upstream/mercurial-mirror Files · mercurial/i18n.py

diff: do not concatenate immutable bytes while building a/b bodies (issue6445)...

diff: do not concatenate immutable bytes while building a/b bodies (issue6445) Use bytearray instead. I don't know what's changed since Python 2, but bytes concatenation is 100x slow on Python 3. % python2.7 -m timeit -s "s = b''" "for i in range(10000): s += b'line'" 1000 loops, best of 3: 321 usec per loop % python3.9 -m timeit -s "s = b''" "for i in range(10000): s += b'line'" 5 loops, best of 5: 39.2 msec per loop Benchmark using tailwind.css (measuring the fast path, a is empty): % HGRCPATH=/dev/null python2.7 ./hg log -R /tmp/issue6445 -p --time \ --color=always --config diff.word-diff=true >/dev/null (prev) time: real 1.580 secs (user 1.560+0.000 sys 0.020+0.000) (this) time: real 1.610 secs (user 1.570+0.000 sys 0.030+0.000) % HGRCPATH=/dev/null python3.9 ./hg log -R /tmp/issue6445 -p --time \ --color=always --config diff.word-diff=true >/dev/null (prev) time: real 114.500 secs (user 114.460+0.000 sys 0.030+0.000) (this) time: real 2.180 secs (user 2.140+0.000 sys 0.040+0.000) Benchmark using random tabular text data (not the fast path): % dd if=/dev/urandom bs=1k count=1000 | hexdump -v -e '16/1 "%3u," "\n"' > ttf % hg ci -ma % dd if=/dev/urandom bs=1k count=1000 | hexdump -v -e '16/1 "%3u," "\n"' > ttf % hg ci -mb % HGRCPATH=/dev/null python2.7 ./hg log -R /tmp/issue6445 -p --time \ --color=always --config diff.word-diff=true >/dev/null (prev) time: real 3.240 secs (user 3.040+0.000 sys 0.200+0.000 (this) time: real 3.230 secs (user 3.070+0.000 sys 0.160+0.000) % HGRCPATH=/dev/null python3.9 ./hg log -R /tmp/issue6445 -p --time \ --color=always --config diff.word-diff=true >/dev/null (prev) time: real 44.130 secs (user 43.850+0.000 sys 0.270+0.000) (this) time: real 4.170 secs (user 3.850+0.000 sys 0.310+0.000)

Martin von Zweigbergk - - Load All Authors

File last commit:

r44069:f0bee3b1 default


                r46624:210f9b8d

stable

Download file

             i18n.py
        
                    115 lines
            
             | 3.8 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / i18n.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # i18n.py - internationalization support for mercurial

      #

      # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      from __future__ import absolute_import

      import gettext as gettextmod

      import locale

      import os

      import sys

      from .pycompat import getattr

      from .utils import resourceutil

      from . import (

          encoding,

          pycompat,

      )

      # modelled after templater.templatepath:

      if getattr(sys, 'frozen', None) is not None:

          module = pycompat.sysexecutable

      else:

          module = pycompat.fsencode(__file__)

      _languages = None

      if (

          pycompat.iswindows

          and b'LANGUAGE' not in encoding.environ

          and b'LC_ALL' not in encoding.environ

          and b'LC_MESSAGES' not in encoding.environ

          and b'LANG' not in encoding.environ

      ):

          # Try to detect UI language by "User Interface Language Management" API

          # if no locale variables are set. Note that locale.getdefaultlocale()

          # uses GetLocaleInfo(), which may be different from UI language.

          # (See http://msdn.microsoft.com/en-us/library/dd374098(v=VS.85).aspx )

          try:

              import ctypes

              langid = ctypes.windll.kernel32.GetUserDefaultUILanguage()

              _languages = [locale.windows_locale[langid]]

          except (ImportError, AttributeError, KeyError):

              # ctypes not found or unknown langid

              pass

      datapath = pycompat.fsdecode(resourceutil.datapath)

      localedir = os.path.join(datapath, 'locale')

      t = gettextmod.translation('hg', localedir, _languages, fallback=True)

      try:

          _ugettext = t.ugettext

      except AttributeError:

          _ugettext = t.gettext

      _msgcache = {}  # encoding: {message: translation}

      def gettext(message):

          """Translate message.

          The message is looked up in the catalog to get a Unicode string,

          which is encoded in the local encoding before being returned.

          Important: message is restricted to characters in the encoding

          given by sys.getdefaultencoding() which is most likely 'ascii'.

          """

          # If message is None, t.ugettext will return u'None' as the

          # translation whereas our callers expect us to return None.

          if message is None or not _ugettext:

              return message

          cache = _msgcache.setdefault(encoding.encoding, {})

          if message not in cache:

              if type(message) is pycompat.unicode:

                  # goofy unicode docstrings in test

                  paragraphs = message.split(u'\n\n')

              else:

                  # should be ascii, but we have unicode docstrings in test, which

                  # are converted to utf-8 bytes on Python 3.

                  paragraphs = [p.decode("utf-8") for p in message.split(b'\n\n')]

              # Be careful not to translate the empty string -- it holds the

              # meta data of the .po file.

              u = u'\n\n'.join([p and _ugettext(p) or u'' for p in paragraphs])

              try:

                  # encoding.tolocal cannot be used since it will first try to

                  # decode the Unicode string. Calling u.decode(enc) really

                  # means u.encode(sys.getdefaultencoding()).decode(enc). Since

                  # the Python encoding defaults to 'ascii', this fails if the

                  # translated string use non-ASCII characters.

                  encodingstr = pycompat.sysstr(encoding.encoding)

                  cache[message] = u.encode(encodingstr, "replace")

              except LookupError:

                  # An unknown encoding results in a LookupError.

                  cache[message] = message

          return cache[message]

      def _plain():

          if (

              b'HGPLAIN' not in encoding.environ

              and b'HGPLAINEXCEPT' not in encoding.environ

          ):

              return False

          exceptions = encoding.environ.get(b'HGPLAINEXCEPT', b'').strip().split(b',')

          return b'i18n' not in exceptions

      if _plain():

          _ = lambda message: message

      else:

          _ = gettext

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# i18n.py - internationalization support for mercurial
				#
				# Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
				#
				# This software may be used and distributed according to the terms of the
				# GNU General Public License version 2 or any later version.

				from __future__ import absolute_import

				import gettext as gettextmod
				import locale
				import os
				import sys

				from .pycompat import getattr
				from .utils import resourceutil
				from . import (
				encoding,
				pycompat,
				)

				# modelled after templater.templatepath:
				if getattr(sys, 'frozen', None) is not None:
				module = pycompat.sysexecutable
				else:
				module = pycompat.fsencode(__file__)

				_languages = None
				if (
				pycompat.iswindows
				and b'LANGUAGE' not in encoding.environ
				and b'LC_ALL' not in encoding.environ
				and b'LC_MESSAGES' not in encoding.environ
				and b'LANG' not in encoding.environ
				):
				# Try to detect UI language by "User Interface Language Management" API
				# if no locale variables are set. Note that locale.getdefaultlocale()
				# uses GetLocaleInfo(), which may be different from UI language.
				# (See http://msdn.microsoft.com/en-us/library/dd374098(v=VS.85).aspx )
				try:
				import ctypes

				langid = ctypes.windll.kernel32.GetUserDefaultUILanguage()
				_languages = [locale.windows_locale[langid]]
				except (ImportError, AttributeError, KeyError):
				# ctypes not found or unknown langid
				pass


				datapath = pycompat.fsdecode(resourceutil.datapath)
				localedir = os.path.join(datapath, 'locale')
				t = gettextmod.translation('hg', localedir, _languages, fallback=True)
				try:
				_ugettext = t.ugettext
				except AttributeError:
				_ugettext = t.gettext


				_msgcache = {} # encoding: {message: translation}


				def gettext(message):
				"""Translate message.

				The message is looked up in the catalog to get a Unicode string,
				which is encoded in the local encoding before being returned.

				Important: message is restricted to characters in the encoding
				given by sys.getdefaultencoding() which is most likely 'ascii'.
				"""
				# If message is None, t.ugettext will return u'None' as the
				# translation whereas our callers expect us to return None.
				if message is None or not _ugettext:
				return message

				cache = _msgcache.setdefault(encoding.encoding, {})
				if message not in cache:
				if type(message) is pycompat.unicode:
				# goofy unicode docstrings in test
				paragraphs = message.split(u'\n\n')
				else:
				# should be ascii, but we have unicode docstrings in test, which
				# are converted to utf-8 bytes on Python 3.
				paragraphs = [p.decode("utf-8") for p in message.split(b'\n\n')]
				# Be careful not to translate the empty string -- it holds the
				# meta data of the .po file.
				u = u'\n\n'.join([p and _ugettext(p) or u'' for p in paragraphs])
				try:
				# encoding.tolocal cannot be used since it will first try to
				# decode the Unicode string. Calling u.decode(enc) really
				# means u.encode(sys.getdefaultencoding()).decode(enc). Since
				# the Python encoding defaults to 'ascii', this fails if the
				# translated string use non-ASCII characters.
				encodingstr = pycompat.sysstr(encoding.encoding)
				cache[message] = u.encode(encodingstr, "replace")
				except LookupError:
				# An unknown encoding results in a LookupError.
				cache[message] = message
				return cache[message]


				def _plain():
				if (
				b'HGPLAIN' not in encoding.environ
				and b'HGPLAINEXCEPT' not in encoding.environ
				):
				return False
				exceptions = encoding.environ.get(b'HGPLAINEXCEPT', b'').strip().split(b',')
				return b'i18n' not in exceptions


				if _plain():
				_ = lambda message: message
				else:
				_ = gettext