##// END OF EJS Templates
i18n: cache translated messages per encoding...
Yuya Nishihara -
r34661:d00ec62d default
parent child Browse files
Show More
@@ -1,109 +1,110
1 # i18n.py - internationalization support for mercurial
1 # i18n.py - internationalization support for mercurial
2 #
2 #
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import gettext as gettextmod
10 import gettext as gettextmod
11 import locale
11 import locale
12 import os
12 import os
13 import sys
13 import sys
14
14
15 from . import (
15 from . import (
16 encoding,
16 encoding,
17 pycompat,
17 pycompat,
18 )
18 )
19
19
20 # modelled after templater.templatepath:
20 # modelled after templater.templatepath:
21 if getattr(sys, 'frozen', None) is not None:
21 if getattr(sys, 'frozen', None) is not None:
22 module = pycompat.sysexecutable
22 module = pycompat.sysexecutable
23 else:
23 else:
24 module = pycompat.fsencode(__file__)
24 module = pycompat.fsencode(__file__)
25
25
26 try:
26 try:
27 unicode
27 unicode
28 except NameError:
28 except NameError:
29 unicode = str
29 unicode = str
30
30
31 _languages = None
31 _languages = None
32 if (pycompat.iswindows
32 if (pycompat.iswindows
33 and 'LANGUAGE' not in encoding.environ
33 and 'LANGUAGE' not in encoding.environ
34 and 'LC_ALL' not in encoding.environ
34 and 'LC_ALL' not in encoding.environ
35 and 'LC_MESSAGES' not in encoding.environ
35 and 'LC_MESSAGES' not in encoding.environ
36 and 'LANG' not in encoding.environ):
36 and 'LANG' not in encoding.environ):
37 # Try to detect UI language by "User Interface Language Management" API
37 # Try to detect UI language by "User Interface Language Management" API
38 # if no locale variables are set. Note that locale.getdefaultlocale()
38 # if no locale variables are set. Note that locale.getdefaultlocale()
39 # uses GetLocaleInfo(), which may be different from UI language.
39 # uses GetLocaleInfo(), which may be different from UI language.
40 # (See http://msdn.microsoft.com/en-us/library/dd374098(v=VS.85).aspx )
40 # (See http://msdn.microsoft.com/en-us/library/dd374098(v=VS.85).aspx )
41 try:
41 try:
42 import ctypes
42 import ctypes
43 langid = ctypes.windll.kernel32.GetUserDefaultUILanguage()
43 langid = ctypes.windll.kernel32.GetUserDefaultUILanguage()
44 _languages = [locale.windows_locale[langid]]
44 _languages = [locale.windows_locale[langid]]
45 except (ImportError, AttributeError, KeyError):
45 except (ImportError, AttributeError, KeyError):
46 # ctypes not found or unknown langid
46 # ctypes not found or unknown langid
47 pass
47 pass
48
48
49 _ugettext = None
49 _ugettext = None
50
50
51 def setdatapath(datapath):
51 def setdatapath(datapath):
52 datapath = pycompat.fsdecode(datapath)
52 datapath = pycompat.fsdecode(datapath)
53 localedir = os.path.join(datapath, pycompat.sysstr('locale'))
53 localedir = os.path.join(datapath, pycompat.sysstr('locale'))
54 t = gettextmod.translation('hg', localedir, _languages, fallback=True)
54 t = gettextmod.translation('hg', localedir, _languages, fallback=True)
55 global _ugettext
55 global _ugettext
56 try:
56 try:
57 _ugettext = t.ugettext
57 _ugettext = t.ugettext
58 except AttributeError:
58 except AttributeError:
59 _ugettext = t.gettext
59 _ugettext = t.gettext
60
60
61 _msgcache = {}
61 _msgcache = {} # encoding: {message: translation}
62
62
63 def gettext(message):
63 def gettext(message):
64 """Translate message.
64 """Translate message.
65
65
66 The message is looked up in the catalog to get a Unicode string,
66 The message is looked up in the catalog to get a Unicode string,
67 which is encoded in the local encoding before being returned.
67 which is encoded in the local encoding before being returned.
68
68
69 Important: message is restricted to characters in the encoding
69 Important: message is restricted to characters in the encoding
70 given by sys.getdefaultencoding() which is most likely 'ascii'.
70 given by sys.getdefaultencoding() which is most likely 'ascii'.
71 """
71 """
72 # If message is None, t.ugettext will return u'None' as the
72 # If message is None, t.ugettext will return u'None' as the
73 # translation whereas our callers expect us to return None.
73 # translation whereas our callers expect us to return None.
74 if message is None or not _ugettext:
74 if message is None or not _ugettext:
75 return message
75 return message
76
76
77 if message not in _msgcache:
77 cache = _msgcache.setdefault(encoding.encoding, {})
78 if message not in cache:
78 if type(message) is unicode:
79 if type(message) is unicode:
79 # goofy unicode docstrings in test
80 # goofy unicode docstrings in test
80 paragraphs = message.split(u'\n\n')
81 paragraphs = message.split(u'\n\n')
81 else:
82 else:
82 paragraphs = [p.decode("ascii") for p in message.split('\n\n')]
83 paragraphs = [p.decode("ascii") for p in message.split('\n\n')]
83 # Be careful not to translate the empty string -- it holds the
84 # Be careful not to translate the empty string -- it holds the
84 # meta data of the .po file.
85 # meta data of the .po file.
85 u = u'\n\n'.join([p and _ugettext(p) or u'' for p in paragraphs])
86 u = u'\n\n'.join([p and _ugettext(p) or u'' for p in paragraphs])
86 try:
87 try:
87 # encoding.tolocal cannot be used since it will first try to
88 # encoding.tolocal cannot be used since it will first try to
88 # decode the Unicode string. Calling u.decode(enc) really
89 # decode the Unicode string. Calling u.decode(enc) really
89 # means u.encode(sys.getdefaultencoding()).decode(enc). Since
90 # means u.encode(sys.getdefaultencoding()).decode(enc). Since
90 # the Python encoding defaults to 'ascii', this fails if the
91 # the Python encoding defaults to 'ascii', this fails if the
91 # translated string use non-ASCII characters.
92 # translated string use non-ASCII characters.
92 encodingstr = pycompat.sysstr(encoding.encoding)
93 encodingstr = pycompat.sysstr(encoding.encoding)
93 _msgcache[message] = u.encode(encodingstr, "replace")
94 cache[message] = u.encode(encodingstr, "replace")
94 except LookupError:
95 except LookupError:
95 # An unknown encoding results in a LookupError.
96 # An unknown encoding results in a LookupError.
96 _msgcache[message] = message
97 cache[message] = message
97 return _msgcache[message]
98 return cache[message]
98
99
99 def _plain():
100 def _plain():
100 if ('HGPLAIN' not in encoding.environ
101 if ('HGPLAIN' not in encoding.environ
101 and 'HGPLAINEXCEPT' not in encoding.environ):
102 and 'HGPLAINEXCEPT' not in encoding.environ):
102 return False
103 return False
103 exceptions = encoding.environ.get('HGPLAINEXCEPT', '').strip().split(',')
104 exceptions = encoding.environ.get('HGPLAINEXCEPT', '').strip().split(',')
104 return 'i18n' not in exceptions
105 return 'i18n' not in exceptions
105
106
106 if _plain():
107 if _plain():
107 _ = lambda message: message
108 _ = lambda message: message
108 else:
109 else:
109 _ = gettext
110 _ = gettext
@@ -1,50 +1,70
1 (Translations are optional)
1 (Translations are optional)
2
2
3 #if gettext no-outer-repo
3 #if gettext no-outer-repo
4
4
5 Test that translations are compiled and installed correctly.
5 Test that translations are compiled and installed correctly.
6
6
7 Default encoding in tests is "ascii" and the translation is encoded
7 Default encoding in tests is "ascii" and the translation is encoded
8 using the "replace" error handler:
8 using the "replace" error handler:
9
9
10 $ LANGUAGE=pt_BR hg tip
10 $ LANGUAGE=pt_BR hg tip
11 abortado: n?o foi encontrado um reposit?rio em '$TESTTMP' (.hg n?o encontrado)!
11 abortado: n?o foi encontrado um reposit?rio em '$TESTTMP' (.hg n?o encontrado)!
12 [255]
12 [255]
13
13
14 Using a more accommodating encoding:
14 Using a more accommodating encoding:
15
15
16 $ HGENCODING=UTF-8 LANGUAGE=pt_BR hg tip
16 $ HGENCODING=UTF-8 LANGUAGE=pt_BR hg tip
17 abortado: n\xc3\xa3o foi encontrado um reposit\xc3\xb3rio em '$TESTTMP' (.hg n\xc3\xa3o encontrado)! (esc)
17 abortado: n\xc3\xa3o foi encontrado um reposit\xc3\xb3rio em '$TESTTMP' (.hg n\xc3\xa3o encontrado)! (esc)
18 [255]
18 [255]
19
19
20 Different encoding:
20 Different encoding:
21
21
22 $ HGENCODING=Latin-1 LANGUAGE=pt_BR hg tip
22 $ HGENCODING=Latin-1 LANGUAGE=pt_BR hg tip
23 abortado: n\xe3o foi encontrado um reposit\xf3rio em '$TESTTMP' (.hg n\xe3o encontrado)! (esc)
23 abortado: n\xe3o foi encontrado um reposit\xf3rio em '$TESTTMP' (.hg n\xe3o encontrado)! (esc)
24 [255]
24 [255]
25
25
26 #endif
26 #endif
27
27
28 #if gettext
28 #if gettext
29
29
30 Test keyword search in translated help text:
30 Test keyword search in translated help text:
31
31
32 $ HGENCODING=UTF-8 LANGUAGE=de hg help -k Aktualisiert
32 $ HGENCODING=UTF-8 LANGUAGE=de hg help -k Aktualisiert
33 Themen:
33 Themen:
34
34
35 subrepos Unterarchive
35 subrepos Unterarchive
36
36
37 Befehle:
37 Befehle:
38
38
39 pull Ruft \xc3\x84nderungen von der angegebenen Quelle ab (esc)
39 pull Ruft \xc3\x84nderungen von der angegebenen Quelle ab (esc)
40 update Aktualisiert das Arbeitsverzeichnis (oder wechselt die Version)
40 update Aktualisiert das Arbeitsverzeichnis (oder wechselt die Version)
41
41
42 #endif
42 #endif
43
43
44 Check Mercurial specific translation problems in each *.po files, and
44 Check Mercurial specific translation problems in each *.po files, and
45 tool itself by doctest
45 tool itself by doctest
46
46
47 $ cd "$TESTDIR"/../i18n
47 $ cd "$TESTDIR"/../i18n
48 $ $PYTHON check-translation.py *.po
48 $ $PYTHON check-translation.py *.po
49 $ $PYTHON check-translation.py --doctest
49 $ $PYTHON check-translation.py --doctest
50 $ cd $TESTTMP
50 $ cd $TESTTMP
51
52 Check i18n cache isn't reused after encoding change:
53
54 $ cat > $TESTTMP/encodingchange.py << EOF
55 > from mercurial import encoding, registrar
56 > from mercurial.i18n import _
57 > cmdtable = {}
58 > command = registrar.command(cmdtable)
59 > @command(b'encodingchange', norepo=True)
60 > def encodingchange(ui):
61 > for encode in (b'ascii', b'UTF-8', b'ascii', b'UTF-8'):
62 > encoding.encoding = encode
63 > ui.write(b'%s\n' % _(b'(EXPERIMENTAL)'))
64 > EOF
65
66 $ LANGUAGE=ja hg --config extensions.encodingchange=$TESTTMP/encodingchange.py encodingchange
67 (?????)
68 (\xe5\xae\x9f\xe9\xa8\x93\xe7\x9a\x84\xe5\xae\x9f\xe8\xa3\x85) (esc)
69 (?????)
70 (\xe5\xae\x9f\xe9\xa8\x93\xe7\x9a\x84\xe5\xae\x9f\xe8\xa3\x85) (esc)
General Comments 0
You need to be logged in to leave comments. Login now