##// END OF EJS Templates
move encoding bits from util to encoding...
Matt Mackall -
r7948:de377b1a default
parent child Browse files
Show More
@@ -0,0 +1,77 b''
1 """
2 encoding.py - character transcoding support for Mercurial
3
4 Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
5
6 This software may be used and distributed according to the terms of
7 the GNU General Public License version 2, incorporated herein by
8 reference.
9 """
10
11 import sys, unicodedata, locale, os, error
12
13 _encodingfixup = {'646': 'ascii', 'ANSI_X3.4-1968': 'ascii'}
14
15 try:
16 encoding = os.environ.get("HGENCODING")
17 if sys.platform == 'darwin' and not encoding:
18 # On darwin, getpreferredencoding ignores the locale environment and
19 # always returns mac-roman. We override this if the environment is
20 # not C (has been customized by the user).
21 locale.setlocale(locale.LC_CTYPE, '')
22 encoding = locale.getlocale()[1]
23 if not encoding:
24 encoding = locale.getpreferredencoding() or 'ascii'
25 encoding = _encodingfixup.get(encoding, encoding)
26 except locale.Error:
27 encoding = 'ascii'
28 encodingmode = os.environ.get("HGENCODINGMODE", "strict")
29 fallbackencoding = 'ISO-8859-1'
30
31 def tolocal(s):
32 """
33 Convert a string from internal UTF-8 to local encoding
34
35 All internal strings should be UTF-8 but some repos before the
36 implementation of locale support may contain latin1 or possibly
37 other character sets. We attempt to decode everything strictly
38 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
39 replace unknown characters.
40 """
41 for e in ('UTF-8', fallbackencoding):
42 try:
43 u = s.decode(e) # attempt strict decoding
44 return u.encode(encoding, "replace")
45 except LookupError, k:
46 raise error.Abort("%s, please check your locale settings" % k)
47 except UnicodeDecodeError:
48 pass
49 u = s.decode("utf-8", "replace") # last ditch
50 return u.encode(encoding, "replace")
51
52 def fromlocal(s):
53 """
54 Convert a string from the local character encoding to UTF-8
55
56 We attempt to decode strings using the encoding mode set by
57 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
58 characters will cause an error message. Other modes include
59 'replace', which replaces unknown characters with a special
60 Unicode character, and 'ignore', which drops the character.
61 """
62 try:
63 return s.decode(encoding, encodingmode).encode("utf-8")
64 except UnicodeDecodeError, inst:
65 sub = s[max(0, inst.start-10):inst.start+10]
66 raise error.Abort("decoding near '%s': %s!" % (sub, inst))
67 except LookupError, k:
68 raise error.Abort("%s, please check your locale settings" % k)
69
70 def colwidth(s):
71 "Find the column width of a UTF-8 string for display"
72 d = s.decode(encoding, 'replace')
73 if hasattr(unicodedata, 'east_asian_width'):
74 w = unicodedata.east_asian_width
75 return sum([w(c) in 'WF' and 2 or 1 for c in d])
76 return len(d)
77
@@ -18,7 +18,7 b' from p4 import p4_source'
18 import filemap
18 import filemap
19
19
20 import os, shutil
20 import os, shutil
21 from mercurial import hg, util
21 from mercurial import hg, util, encoding
22 from mercurial.i18n import _
22 from mercurial.i18n import _
23
23
24 orig_encoding = 'ascii'
24 orig_encoding = 'ascii'
@@ -275,7 +275,7 b' class converter(object):'
275 if "\n" in desc:
275 if "\n" in desc:
276 desc = desc.splitlines()[0]
276 desc = desc.splitlines()[0]
277 # convert log message to local encoding without using
277 # convert log message to local encoding without using
278 # tolocal() because util._encoding conver() use it as
278 # tolocal() because encoding.encoding conver() use it as
279 # 'utf-8'
279 # 'utf-8'
280 self.ui.status("%d %s\n" % (num, recode(desc)))
280 self.ui.status("%d %s\n" % (num, recode(desc)))
281 self.ui.note(_("source: %s\n") % recode(c))
281 self.ui.note(_("source: %s\n") % recode(c))
@@ -308,8 +308,8 b' class converter(object):'
308
308
309 def convert(ui, src, dest=None, revmapfile=None, **opts):
309 def convert(ui, src, dest=None, revmapfile=None, **opts):
310 global orig_encoding
310 global orig_encoding
311 orig_encoding = util._encoding
311 orig_encoding = encoding.encoding
312 util._encoding = 'UTF-8'
312 encoding.encoding = 'UTF-8'
313
313
314 if not dest:
314 if not dest:
315 dest = hg.defaultdest(src) + "-hg"
315 dest = hg.defaultdest(src) + "-hg"
@@ -6,7 +6,7 b''
6 from mercurial import demandimport
6 from mercurial import demandimport
7 demandimport.ignore.extend(['pkgutil', 'pkg_resources', '__main__',])
7 demandimport.ignore.extend(['pkgutil', 'pkg_resources', '__main__',])
8
8
9 from mercurial import util
9 from mercurial import util, encoding
10 from mercurial.templatefilters import filters
10 from mercurial.templatefilters import filters
11
11
12 from pygments import highlight
12 from pygments import highlight
@@ -30,19 +30,19 b' def pygmentize(field, fctx, style, tmpl)'
30 return
30 return
31
31
32 # avoid UnicodeDecodeError in pygments
32 # avoid UnicodeDecodeError in pygments
33 text = util.tolocal(text)
33 text = encoding.tolocal(text)
34
34
35 # To get multi-line strings right, we can't format line-by-line
35 # To get multi-line strings right, we can't format line-by-line
36 try:
36 try:
37 lexer = guess_lexer_for_filename(fctx.path(), text[:1024],
37 lexer = guess_lexer_for_filename(fctx.path(), text[:1024],
38 encoding=util._encoding)
38 encoding=encoding.encoding)
39 except (ClassNotFound, ValueError):
39 except (ClassNotFound, ValueError):
40 try:
40 try:
41 lexer = guess_lexer(text[:1024], encoding=util._encoding)
41 lexer = guess_lexer(text[:1024], encoding=encoding.encoding)
42 except (ClassNotFound, ValueError):
42 except (ClassNotFound, ValueError):
43 lexer = TextLexer(encoding=util._encoding)
43 lexer = TextLexer(encoding=encoding.encoding)
44
44
45 formatter = HtmlFormatter(style=style, encoding=util._encoding)
45 formatter = HtmlFormatter(style=style, encoding=encoding.encoding)
46
46
47 colorized = highlight(text, lexer, formatter)
47 colorized = highlight(text, lexer, formatter)
48 # strip wrapping div
48 # strip wrapping div
@@ -36,19 +36,19 b' To use this extension, enable the extens'
36 [extensions]
36 [extensions]
37 hgext.win32mbcs =
37 hgext.win32mbcs =
38
38
39 Path encoding conversion are done between unicode and util._encoding
39 Path encoding conversion are done between unicode and encoding.encoding
40 which is decided by mercurial from current locale setting or HGENCODING.
40 which is decided by mercurial from current locale setting or HGENCODING.
41
41
42 """
42 """
43
43
44 import os
44 import os
45 from mercurial.i18n import _
45 from mercurial.i18n import _
46 from mercurial import util
46 from mercurial import util, encoding
47
47
48 def decode(arg):
48 def decode(arg):
49 if isinstance(arg, str):
49 if isinstance(arg, str):
50 uarg = arg.decode(util._encoding)
50 uarg = arg.decode(encoding.encoding)
51 if arg == uarg.encode(util._encoding):
51 if arg == uarg.encode(encoding.encoding):
52 return uarg
52 return uarg
53 raise UnicodeError("Not local encoding")
53 raise UnicodeError("Not local encoding")
54 elif isinstance(arg, tuple):
54 elif isinstance(arg, tuple):
@@ -59,7 +59,7 b' def decode(arg):'
59
59
60 def encode(arg):
60 def encode(arg):
61 if isinstance(arg, unicode):
61 if isinstance(arg, unicode):
62 return arg.encode(util._encoding)
62 return arg.encode(encoding.encoding)
63 elif isinstance(arg, tuple):
63 elif isinstance(arg, tuple):
64 return tuple(map(encode, arg))
64 return tuple(map(encode, arg))
65 elif isinstance(arg, list):
65 elif isinstance(arg, list):
@@ -76,10 +76,10 b' def wrapper(func, args):'
76 # convert arguments to unicode, call func, then convert back
76 # convert arguments to unicode, call func, then convert back
77 return encode(func(*decode(args)))
77 return encode(func(*decode(args)))
78 except UnicodeError:
78 except UnicodeError:
79 # If not encoded with util._encoding, report it then
79 # If not encoded with encoding.encoding, report it then
80 # continue with calling original function.
80 # continue with calling original function.
81 raise util.Abort(_("[win32mbcs] filename conversion fail with"
81 raise util.Abort(_("[win32mbcs] filename conversion fail with"
82 " %s encoding\n") % (util._encoding))
82 " %s encoding\n") % (encoding.encoding))
83
83
84 def wrapname(name):
84 def wrapname(name):
85 idx = name.rfind('.')
85 idx = name.rfind('.')
@@ -115,8 +115,9 b' def reposetup(ui, repo):'
115 return
115 return
116
116
117 # fake is only for relevant environment.
117 # fake is only for relevant environment.
118 if util._encoding.lower() in problematic_encodings.split():
118 if encoding.encoding.lower() in problematic_encodings.split():
119 for f in funcs.split():
119 for f in funcs.split():
120 wrapname(f)
120 wrapname(f)
121 ui.debug(_("[win32mbcs] activated with encoding: %s\n") % util._encoding)
121 ui.debug(_("[win32mbcs] activated with encoding: %s\n")
122 % encoding.encoding)
122
123
@@ -7,7 +7,7 b''
7
7
8 from node import bin, hex, nullid
8 from node import bin, hex, nullid
9 from i18n import _
9 from i18n import _
10 import util, error, revlog
10 import util, error, revlog, encoding
11
11
12 def _string_escape(text):
12 def _string_escape(text):
13 """
13 """
@@ -175,10 +175,10 b' class changelog(revlog.revlog):'
175 if not text:
175 if not text:
176 return (nullid, "", (0, 0), [], "", {'branch': 'default'})
176 return (nullid, "", (0, 0), [], "", {'branch': 'default'})
177 last = text.index("\n\n")
177 last = text.index("\n\n")
178 desc = util.tolocal(text[last + 2:])
178 desc = encoding.tolocal(text[last + 2:])
179 l = text[:last].split('\n')
179 l = text[:last].split('\n')
180 manifest = bin(l[0])
180 manifest = bin(l[0])
181 user = util.tolocal(l[1])
181 user = encoding.tolocal(l[1])
182
182
183 extra_data = l[2].split(' ', 2)
183 extra_data = l[2].split(' ', 2)
184 if len(extra_data) != 3:
184 if len(extra_data) != 3:
@@ -205,7 +205,7 b' class changelog(revlog.revlog):'
205 if "\n" in user:
205 if "\n" in user:
206 raise error.RevlogError(_("username %s contains a newline")
206 raise error.RevlogError(_("username %s contains a newline")
207 % repr(user))
207 % repr(user))
208 user, desc = util.fromlocal(user), util.fromlocal(desc)
208 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
209
209
210 if date:
210 if date:
211 parseddate = "%d %d" % util.parsedate(date)
211 parseddate = "%d %d" % util.parsedate(date)
@@ -7,7 +7,7 b''
7
7
8 from node import hex, nullid, nullrev, short
8 from node import hex, nullid, nullrev, short
9 from i18n import _
9 from i18n import _
10 import os, sys, bisect, stat
10 import os, sys, bisect, stat, encoding
11 import mdiff, bdiff, util, templater, templatefilters, patch, errno, error
11 import mdiff, bdiff, util, templater, templatefilters, patch, errno, error
12 import match as _match
12 import match as _match
13
13
@@ -626,7 +626,7 b' class changeset_printer(object):'
626
626
627 # don't show the default branch name
627 # don't show the default branch name
628 if branch != 'default':
628 if branch != 'default':
629 branch = util.tolocal(branch)
629 branch = encoding.tolocal(branch)
630 self.ui.write(_("branch: %s\n") % branch)
630 self.ui.write(_("branch: %s\n") % branch)
631 for tag in self.repo.nodetags(changenode):
631 for tag in self.repo.nodetags(changenode):
632 self.ui.write(_("tag: %s\n") % tag)
632 self.ui.write(_("tag: %s\n") % tag)
@@ -791,7 +791,7 b' class changeset_templater(changeset_prin'
791 def showbranches(**args):
791 def showbranches(**args):
792 branch = ctx.branch()
792 branch = ctx.branch()
793 if branch != 'default':
793 if branch != 'default':
794 branch = util.tolocal(branch)
794 branch = encoding.tolocal(branch)
795 return showlist('branch', [branch], plural='branches', **args)
795 return showlist('branch', [branch], plural='branches', **args)
796
796
797 def showparents(**args):
797 def showparents(**args):
@@ -9,7 +9,7 b' from node import hex, nullid, nullrev, s'
9 from i18n import _, gettext
9 from i18n import _, gettext
10 import os, re, sys
10 import os, re, sys
11 import hg, util, revlog, bundlerepo, extensions, copies, context, error
11 import hg, util, revlog, bundlerepo, extensions, copies, context, error
12 import difflib, patch, time, help, mdiff, tempfile, url
12 import difflib, patch, time, help, mdiff, tempfile, url, encoding
13 import archival, changegroup, cmdutil, hgweb.server, sshserver, hbisect
13 import archival, changegroup, cmdutil, hgweb.server, sshserver, hbisect
14 import merge as merge_
14 import merge as merge_
15
15
@@ -415,10 +415,10 b' def branch(ui, repo, label=None, **opts)'
415 if label not in [p.branch() for p in repo.parents()]:
415 if label not in [p.branch() for p in repo.parents()]:
416 raise util.Abort(_('a branch of the same name already exists'
416 raise util.Abort(_('a branch of the same name already exists'
417 ' (use --force to override)'))
417 ' (use --force to override)'))
418 repo.dirstate.setbranch(util.fromlocal(label))
418 repo.dirstate.setbranch(encoding.fromlocal(label))
419 ui.status(_('marked working directory as branch %s\n') % label)
419 ui.status(_('marked working directory as branch %s\n') % label)
420 else:
420 else:
421 ui.write("%s\n" % util.tolocal(repo.dirstate.branch()))
421 ui.write("%s\n" % encoding.tolocal(repo.dirstate.branch()))
422
422
423 def branches(ui, repo, active=False):
423 def branches(ui, repo, active=False):
424 """list repository named branches
424 """list repository named branches
@@ -431,7 +431,7 b' def branches(ui, repo, active=False):'
431 Use the command 'hg update' to switch to an existing branch.
431 Use the command 'hg update' to switch to an existing branch.
432 """
432 """
433 hexfunc = ui.debugflag and hex or short
433 hexfunc = ui.debugflag and hex or short
434 activebranches = [util.tolocal(repo[n].branch())
434 activebranches = [encoding.tolocal(repo[n].branch())
435 for n in repo.heads(closed=False)]
435 for n in repo.heads(closed=False)]
436 branches = util.sort([(tag in activebranches, repo.changelog.rev(node), tag)
436 branches = util.sort([(tag in activebranches, repo.changelog.rev(node), tag)
437 for tag, node in repo.branchtags().items()])
437 for tag, node in repo.branchtags().items()])
@@ -449,7 +449,7 b' def branches(ui, repo, active=False):'
449 notice = ' (closed)'
449 notice = ' (closed)'
450 else:
450 else:
451 notice = ' (inactive)'
451 notice = ' (inactive)'
452 rev = str(node).rjust(31 - util.colwidth(tag))
452 rev = str(node).rjust(31 - encoding.colwidth(tag))
453 data = tag, rev, hexfunc(hn), notice
453 data = tag, rev, hexfunc(hn), notice
454 ui.write("%s %s:%s%s\n" % data)
454 ui.write("%s %s:%s%s\n" % data)
455
455
@@ -882,9 +882,9 b' def debuginstall(ui):'
882 problems = 0
882 problems = 0
883
883
884 # encoding
884 # encoding
885 ui.status(_("Checking encoding (%s)...\n") % util._encoding)
885 ui.status(_("Checking encoding (%s)...\n") % encoding.encoding)
886 try:
886 try:
887 util.fromlocal("test")
887 encoding.fromlocal("test")
888 except util.Abort, inst:
888 except util.Abort, inst:
889 ui.write(" %s\n" % inst)
889 ui.write(" %s\n" % inst)
890 ui.write(_(" (check that your locale is properly set)\n"))
890 ui.write(_(" (check that your locale is properly set)\n"))
@@ -1579,7 +1579,7 b' def identify(ui, repo, source=None,'
1579 output.append(str(ctx.rev()))
1579 output.append(str(ctx.rev()))
1580
1580
1581 if repo.local() and default and not ui.quiet:
1581 if repo.local() and default and not ui.quiet:
1582 b = util.tolocal(ctx.branch())
1582 b = encoding.tolocal(ctx.branch())
1583 if b != 'default':
1583 if b != 'default':
1584 output.append("(%s)" % b)
1584 output.append("(%s)" % b)
1585
1585
@@ -1589,7 +1589,7 b' def identify(ui, repo, source=None,'
1589 output.append(t)
1589 output.append(t)
1590
1590
1591 if branch:
1591 if branch:
1592 output.append(util.tolocal(ctx.branch()))
1592 output.append(encoding.tolocal(ctx.branch()))
1593
1593
1594 if tags:
1594 if tags:
1595 output.extend(ctx.tags())
1595 output.extend(ctx.tags())
@@ -2855,7 +2855,7 b' def tags(ui, repo):'
2855 except error.LookupError:
2855 except error.LookupError:
2856 r = " ?:%s" % hn
2856 r = " ?:%s" % hn
2857 else:
2857 else:
2858 spaces = " " * (30 - util.colwidth(t))
2858 spaces = " " * (30 - encoding.colwidth(t))
2859 if ui.verbose:
2859 if ui.verbose:
2860 if repo.tagtype(t) == 'local':
2860 if repo.tagtype(t) == 'local':
2861 tagtype = " local"
2861 tagtype = " local"
@@ -2976,8 +2976,9 b' globalopts = ['
2976 ('', 'config', [], _('set/override config option')),
2976 ('', 'config', [], _('set/override config option')),
2977 ('', 'debug', None, _('enable debugging output')),
2977 ('', 'debug', None, _('enable debugging output')),
2978 ('', 'debugger', None, _('start debugger')),
2978 ('', 'debugger', None, _('start debugger')),
2979 ('', 'encoding', util._encoding, _('set the charset encoding')),
2979 ('', 'encoding', encoding.encoding, _('set the charset encoding')),
2980 ('', 'encodingmode', util._encodingmode, _('set the charset encoding mode')),
2980 ('', 'encodingmode', encoding.encodingmode,
2981 _('set the charset encoding mode')),
2981 ('', 'lsprof', None, _('print improved command execution profile')),
2982 ('', 'lsprof', None, _('print improved command execution profile')),
2982 ('', 'traceback', None, _('print traceback on exception')),
2983 ('', 'traceback', None, _('print traceback on exception')),
2983 ('', 'time', None, _('time how long the command takes')),
2984 ('', 'time', None, _('time how long the command takes')),
@@ -8,7 +8,7 b''
8 from i18n import _
8 from i18n import _
9 import os, sys, atexit, signal, pdb, socket, errno, shlex, time
9 import os, sys, atexit, signal, pdb, socket, errno, shlex, time
10 import util, commands, hg, fancyopts, extensions, hook, error
10 import util, commands, hg, fancyopts, extensions, hook, error
11 import cmdutil
11 import cmdutil, encoding
12 import ui as _ui
12 import ui as _ui
13
13
14 def run():
14 def run():
@@ -304,7 +304,7 b' def _dispatch(ui, args):'
304 # check for fallback encoding
304 # check for fallback encoding
305 fallback = lui.config('ui', 'fallbackencoding')
305 fallback = lui.config('ui', 'fallbackencoding')
306 if fallback:
306 if fallback:
307 util._fallbackencoding = fallback
307 encoding.fallbackencoding = fallback
308
308
309 fullargs = args
309 fullargs = args
310 cmd, func, args, options, cmdoptions = _parse(lui, args)
310 cmd, func, args, options, cmdoptions = _parse(lui, args)
@@ -319,9 +319,9 b' def _dispatch(ui, args):'
319 "and --repository may only be abbreviated as --repo!"))
319 "and --repository may only be abbreviated as --repo!"))
320
320
321 if options["encoding"]:
321 if options["encoding"]:
322 util._encoding = options["encoding"]
322 encoding.encoding = options["encoding"]
323 if options["encodingmode"]:
323 if options["encodingmode"]:
324 util._encodingmode = options["encodingmode"]
324 encoding.encodingmode = options["encodingmode"]
325 if options["time"]:
325 if options["time"]:
326 def get_times():
326 def get_times():
327 t = os.times()
327 t = os.times()
@@ -7,7 +7,7 b''
7 # of the GNU General Public License, incorporated herein by reference.
7 # of the GNU General Public License, incorporated herein by reference.
8
8
9 import os
9 import os
10 from mercurial import ui, hg, util, hook, error
10 from mercurial import ui, hg, util, hook, error, encoding
11 from mercurial import templater, templatefilters
11 from mercurial import templater, templatefilters
12 from common import get_mtime, style_map, ErrorResponse
12 from common import get_mtime, style_map, ErrorResponse
13 from common import HTTP_OK, HTTP_BAD_REQUEST, HTTP_NOT_FOUND, HTTP_SERVER_ERROR
13 from common import HTTP_OK, HTTP_BAD_REQUEST, HTTP_NOT_FOUND, HTTP_SERVER_ERROR
@@ -65,7 +65,7 b' class hgweb(object):'
65 self.maxshortchanges = int(self.config("web", "maxshortchanges", 60))
65 self.maxshortchanges = int(self.config("web", "maxshortchanges", 60))
66 self.maxfiles = int(self.config("web", "maxfiles", 10))
66 self.maxfiles = int(self.config("web", "maxfiles", 10))
67 self.allowpull = self.configbool("web", "allowpull", True)
67 self.allowpull = self.configbool("web", "allowpull", True)
68 self.encoding = self.config("web", "encoding", util._encoding)
68 self.encoding = self.config("web", "encoding", encoding.encoding)
69
69
70 def run(self):
70 def run(self):
71 if not os.environ.get('GATEWAY_INTERFACE', '').startswith("CGI/1."):
71 if not os.environ.get('GATEWAY_INTERFACE', '').startswith("CGI/1."):
@@ -8,7 +8,7 b''
8
8
9 import os
9 import os
10 from mercurial.i18n import _
10 from mercurial.i18n import _
11 from mercurial import ui, hg, util, templater, templatefilters, error
11 from mercurial import ui, hg, util, templater, templatefilters, error, encoding
12 from common import ErrorResponse, get_mtime, staticfile, style_map, paritygen,\
12 from common import ErrorResponse, get_mtime, staticfile, style_map, paritygen,\
13 get_contact, HTTP_OK, HTTP_NOT_FOUND, HTTP_SERVER_ERROR
13 get_contact, HTTP_OK, HTTP_NOT_FOUND, HTTP_SERVER_ERROR
14 from hgweb_mod import hgweb
14 from hgweb_mod import hgweb
@@ -119,7 +119,7 b' class hgwebdir(object):'
119
119
120 virtual = req.env.get("PATH_INFO", "").strip('/')
120 virtual = req.env.get("PATH_INFO", "").strip('/')
121 tmpl = self.templater(req)
121 tmpl = self.templater(req)
122 ctype = tmpl('mimetype', encoding=util._encoding)
122 ctype = tmpl('mimetype', encoding=encoding.encoding)
123 ctype = templater.stringify(ctype)
123 ctype = templater.stringify(ctype)
124
124
125 # a static file
125 # a static file
@@ -285,7 +285,7 b' class hgwebdir(object):'
285 def templater(self, req):
285 def templater(self, req):
286
286
287 def header(**map):
287 def header(**map):
288 yield tmpl('header', encoding=util._encoding, **map)
288 yield tmpl('header', encoding=encoding.encoding, **map)
289
289
290 def footer(**map):
290 def footer(**map):
291 yield tmpl("footer", **map)
291 yield tmpl("footer", **map)
@@ -7,7 +7,7 b' This software may be used and distribute'
7 of the GNU General Public License, incorporated herein by reference.
7 of the GNU General Public License, incorporated herein by reference.
8 """
8 """
9
9
10 import gettext, sys, os
10 import gettext, sys, os, encoding
11
11
12 # modelled after templater.templatepath:
12 # modelled after templater.templatepath:
13 if hasattr(sys, 'frozen'):
13 if hasattr(sys, 'frozen'):
@@ -37,15 +37,13 b' def gettext(message):'
37 if message is None:
37 if message is None:
38 return message
38 return message
39
39
40 # We cannot just run the text through util.tolocal since that
40 # We cannot just run the text through encoding.tolocal since that
41 # leads to infinite recursion when util._encoding is invalid.
41 # leads to infinite recursion when encoding._encoding is invalid.
42 try:
42 try:
43 u = t.ugettext(message)
43 u = t.ugettext(message)
44 return u.encode(util._encoding, "replace")
44 return u.encode(encoding.encoding, "replace")
45 except LookupError:
45 except LookupError:
46 return message
46 return message
47
47
48 _ = gettext
48 _ = gettext
49
49
50 # Moved after _ because of circular import.
51 import util
@@ -9,7 +9,7 b' from node import bin, hex, nullid, nullr'
9 from i18n import _
9 from i18n import _
10 import repo, changegroup
10 import repo, changegroup
11 import changelog, dirstate, filelog, manifest, context, weakref
11 import changelog, dirstate, filelog, manifest, context, weakref
12 import lock, transaction, stat, errno, ui, store
12 import lock, transaction, stat, errno, ui, store, encoding
13 import os, time, util, extensions, hook, inspect, error
13 import os, time, util, extensions, hook, inspect, error
14 import match as match_
14 import match as match_
15 import merge as merge_
15 import merge as merge_
@@ -188,7 +188,7 b' class localrepository(repo.repository):'
188 fp.write(prevtags)
188 fp.write(prevtags)
189
189
190 # committed tags are stored in UTF-8
190 # committed tags are stored in UTF-8
191 writetags(fp, names, util.fromlocal, prevtags)
191 writetags(fp, names, encoding.fromlocal, prevtags)
192
192
193 if use_dirstate and '.hgtags' not in self.dirstate:
193 if use_dirstate and '.hgtags' not in self.dirstate:
194 self.add(['.hgtags'])
194 self.add(['.hgtags'])
@@ -254,7 +254,7 b' class localrepository(repo.repository):'
254 warn(_("cannot parse entry"))
254 warn(_("cannot parse entry"))
255 continue
255 continue
256 node, key = s
256 node, key = s
257 key = util.tolocal(key.strip()) # stored in UTF-8
257 key = encoding.tolocal(key.strip()) # stored in UTF-8
258 try:
258 try:
259 bin_n = bin(node)
259 bin_n = bin(node)
260 except TypeError:
260 except TypeError:
@@ -297,7 +297,7 b' class localrepository(repo.repository):'
297 readtags(f.data().splitlines(), f, "global")
297 readtags(f.data().splitlines(), f, "global")
298
298
299 try:
299 try:
300 data = util.fromlocal(self.opener("localtags").read())
300 data = encoding.fromlocal(self.opener("localtags").read())
301 # localtags are stored in the local character set
301 # localtags are stored in the local character set
302 # while the internal tag table is stored in UTF-8
302 # while the internal tag table is stored in UTF-8
303 readtags(data.splitlines(), "localtags", "local")
303 readtags(data.splitlines(), "localtags", "local")
@@ -397,7 +397,7 b' class localrepository(repo.repository):'
397 # the branch cache is stored on disk as UTF-8, but in the local
397 # the branch cache is stored on disk as UTF-8, but in the local
398 # charset internally
398 # charset internally
399 for k, v in partial.iteritems():
399 for k, v in partial.iteritems():
400 self.branchcache[util.tolocal(k)] = v
400 self.branchcache[encoding.tolocal(k)] = v
401 return self.branchcache
401 return self.branchcache
402
402
403
403
@@ -647,7 +647,7 b' class localrepository(repo.repository):'
647 except IOError:
647 except IOError:
648 self.ui.warn(_("Named branch could not be reset, "
648 self.ui.warn(_("Named branch could not be reset, "
649 "current branch still is: %s\n")
649 "current branch still is: %s\n")
650 % util.tolocal(self.dirstate.branch()))
650 % encoding.tolocal(self.dirstate.branch()))
651 self.invalidate()
651 self.invalidate()
652 self.dirstate.invalidate()
652 self.dirstate.invalidate()
653 else:
653 else:
@@ -943,7 +943,8 b' class localrepository(repo.repository):'
943 if p2 != nullid:
943 if p2 != nullid:
944 edittext.append("HG: branch merge")
944 edittext.append("HG: branch merge")
945 if branchname:
945 if branchname:
946 edittext.append("HG: branch '%s'" % util.tolocal(branchname))
946 edittext.append("HG: branch '%s'"
947 % encoding.tolocal(branchname))
947 edittext.extend(["HG: added %s" % f for f in added])
948 edittext.extend(["HG: added %s" % f for f in added])
948 edittext.extend(["HG: changed %s" % f for f in updated])
949 edittext.extend(["HG: changed %s" % f for f in updated])
949 edittext.extend(["HG: removed %s" % f for f in removed])
950 edittext.extend(["HG: removed %s" % f for f in removed])
@@ -8,7 +8,7 b''
8 from i18n import _
8 from i18n import _
9 import os, smtplib, socket
9 import os, smtplib, socket
10 import email.Header, email.MIMEText, email.Utils
10 import email.Header, email.MIMEText, email.Utils
11 import util
11 import util, encoding
12
12
13 def _smtp(ui):
13 def _smtp(ui):
14 '''build an smtp connection and return a function to send mail'''
14 '''build an smtp connection and return a function to send mail'''
@@ -100,8 +100,8 b" def mimetextpatch(s, subtype='plain', di"
100 def _charsets(ui):
100 def _charsets(ui):
101 '''Obtains charsets to send mail parts not containing patches.'''
101 '''Obtains charsets to send mail parts not containing patches.'''
102 charsets = [cs.lower() for cs in ui.configlist('email', 'charsets')]
102 charsets = [cs.lower() for cs in ui.configlist('email', 'charsets')]
103 fallbacks = [util._fallbackencoding.lower(),
103 fallbacks = [encoding.fallbackencoding.lower(),
104 util._encoding.lower(), 'utf-8']
104 encoding.encoding.lower(), 'utf-8']
105 for cs in fallbacks: # util.unique does not keep order
105 for cs in fallbacks: # util.unique does not keep order
106 if cs not in charsets:
106 if cs not in charsets:
107 charsets.append(cs)
107 charsets.append(cs)
@@ -110,14 +110,14 b' def _charsets(ui):'
110 def _encode(ui, s, charsets):
110 def _encode(ui, s, charsets):
111 '''Returns (converted) string, charset tuple.
111 '''Returns (converted) string, charset tuple.
112 Finds out best charset by cycling through sendcharsets in descending
112 Finds out best charset by cycling through sendcharsets in descending
113 order. Tries both _encoding and _fallbackencoding for input. Only as
113 order. Tries both encoding and fallbackencoding for input. Only as
114 last resort send as is in fake ascii.
114 last resort send as is in fake ascii.
115 Caveat: Do not use for mail parts containing patches!'''
115 Caveat: Do not use for mail parts containing patches!'''
116 try:
116 try:
117 s.decode('ascii')
117 s.decode('ascii')
118 except UnicodeDecodeError:
118 except UnicodeDecodeError:
119 sendcharsets = charsets or _charsets(ui)
119 sendcharsets = charsets or _charsets(ui)
120 for ics in (util._encoding, util._fallbackencoding):
120 for ics in (encoding.encoding, encoding.fallbackencoding):
121 try:
121 try:
122 u = s.decode(ics)
122 u = s.decode(ics)
123 except UnicodeDecodeError:
123 except UnicodeDecodeError:
@@ -6,7 +6,7 b''
6 # of the GNU General Public License, incorporated herein by reference.
6 # of the GNU General Public License, incorporated herein by reference.
7
7
8 import cgi, re, os, time, urllib, textwrap
8 import cgi, re, os, time, urllib, textwrap
9 import util, templater
9 import util, templater, encoding
10
10
11 agescales = [("second", 1),
11 agescales = [("second", 1),
12 ("minute", 60),
12 ("minute", 60),
@@ -76,7 +76,7 b' def nl2br(text):'
76 return text.replace('\n', '<br/>\n')
76 return text.replace('\n', '<br/>\n')
77
77
78 def obfuscate(text):
78 def obfuscate(text):
79 text = unicode(text, util._encoding, 'replace')
79 text = unicode(text, encoding.encoding, 'replace')
80 return ''.join(['&#%d;' % ord(c) for c in text])
80 return ''.join(['&#%d;' % ord(c) for c in text])
81
81
82 def domain(author):
82 def domain(author):
@@ -14,8 +14,8 b' platform-specific details from the core.'
14
14
15 from i18n import _
15 from i18n import _
16 import cStringIO, errno, re, shutil, sys, tempfile, traceback, error
16 import cStringIO, errno, re, shutil, sys, tempfile, traceback, error
17 import os, stat, threading, time, calendar, ConfigParser, locale, glob, osutil
17 import os, stat, threading, time, calendar, ConfigParser, glob, osutil
18 import imp, unicodedata
18 import imp
19
19
20 # Python compatibility
20 # Python compatibility
21
21
@@ -81,71 +81,6 b' except ImportError:'
81 popen3 = os.popen3
81 popen3 = os.popen3
82
82
83
83
84 _encodingfixup = {'646': 'ascii', 'ANSI_X3.4-1968': 'ascii'}
85
86 try:
87 _encoding = os.environ.get("HGENCODING")
88 if sys.platform == 'darwin' and not _encoding:
89 # On darwin, getpreferredencoding ignores the locale environment and
90 # always returns mac-roman. We override this if the environment is
91 # not C (has been customized by the user).
92 locale.setlocale(locale.LC_CTYPE, '')
93 _encoding = locale.getlocale()[1]
94 if not _encoding:
95 _encoding = locale.getpreferredencoding() or 'ascii'
96 _encoding = _encodingfixup.get(_encoding, _encoding)
97 except locale.Error:
98 _encoding = 'ascii'
99 _encodingmode = os.environ.get("HGENCODINGMODE", "strict")
100 _fallbackencoding = 'ISO-8859-1'
101
102 def tolocal(s):
103 """
104 Convert a string from internal UTF-8 to local encoding
105
106 All internal strings should be UTF-8 but some repos before the
107 implementation of locale support may contain latin1 or possibly
108 other character sets. We attempt to decode everything strictly
109 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
110 replace unknown characters.
111 """
112 for e in ('UTF-8', _fallbackencoding):
113 try:
114 u = s.decode(e) # attempt strict decoding
115 return u.encode(_encoding, "replace")
116 except LookupError, k:
117 raise Abort(_("%s, please check your locale settings") % k)
118 except UnicodeDecodeError:
119 pass
120 u = s.decode("utf-8", "replace") # last ditch
121 return u.encode(_encoding, "replace")
122
123 def fromlocal(s):
124 """
125 Convert a string from the local character encoding to UTF-8
126
127 We attempt to decode strings using the encoding mode set by
128 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
129 characters will cause an error message. Other modes include
130 'replace', which replaces unknown characters with a special
131 Unicode character, and 'ignore', which drops the character.
132 """
133 try:
134 return s.decode(_encoding, _encodingmode).encode("utf-8")
135 except UnicodeDecodeError, inst:
136 sub = s[max(0, inst.start-10):inst.start+10]
137 raise Abort("decoding near '%s': %s!" % (sub, inst))
138 except LookupError, k:
139 raise Abort(_("%s, please check your locale settings") % k)
140
141 def colwidth(s):
142 "Find the column width of a UTF-8 string for display"
143 d = s.decode(_encoding, 'replace')
144 if hasattr(unicodedata, 'east_asian_width'):
145 w = unicodedata.east_asian_width
146 return sum([w(c) in 'WF' and 2 or 1 for c in d])
147 return len(d)
148
149 def version():
84 def version():
150 """Return version information if available."""
85 """Return version information if available."""
151 try:
86 try:
@@ -16,7 +16,7 b' import win32api'
16
16
17 import errno, os, sys, pywintypes, win32con, win32file, win32process
17 import errno, os, sys, pywintypes, win32con, win32file, win32process
18 import cStringIO, winerror
18 import cStringIO, winerror
19 import osutil
19 import osutil, encoding
20 import util
20 import util
21 from win32com.shell import shell,shellcon
21 from win32com.shell import shell,shellcon
22
22
@@ -213,7 +213,7 b' def lookup_reg(key, valname=None, scope='
213 try:
213 try:
214 val = QueryValueEx(OpenKey(s, key), valname)[0]
214 val = QueryValueEx(OpenKey(s, key), valname)[0]
215 # never let a Unicode string escape into the wild
215 # never let a Unicode string escape into the wild
216 return util.tolocal(val.encode('UTF-8'))
216 return encoding.tolocal(val.encode('UTF-8'))
217 except EnvironmentError:
217 except EnvironmentError:
218 pass
218 pass
219
219
General Comments 0
You need to be logged in to leave comments. Login now