diff --git a/hgext/convert/subversion.py b/hgext/convert/subversion.py --- a/hgext/convert/subversion.py +++ b/hgext/convert/subversion.py @@ -187,13 +187,14 @@ def debugsvnlog(ui, **opts): """Fetch SVN log in a subprocess and channel them back to parent to avoid memory collection issues. """ - if svn is None: - raise error.Abort( - _(b'debugsvnlog could not load Subversion python bindings') - ) + with util.with_lc_ctype(): + if svn is None: + raise error.Abort( + _(b'debugsvnlog could not load Subversion python bindings') + ) - args = decodeargs(ui.fin.read()) - get_log_child(ui.fout, *args) + args = decodeargs(ui.fin.read()) + get_log_child(ui.fout, *args) class logstream(object): @@ -420,18 +421,19 @@ class svn_source(converter_source): self.url = geturl(url) self.encoding = b'UTF-8' # Subversion is always nominal UTF-8 try: - self.transport = transport.SvnRaTransport(url=self.url) - self.ra = self.transport.ra - self.ctx = self.transport.client - self.baseurl = svn.ra.get_repos_root(self.ra) - # Module is either empty or a repository path starting with - # a slash and not ending with a slash. - self.module = urlreq.unquote(self.url[len(self.baseurl) :]) - self.prevmodule = None - self.rootmodule = self.module - self.commits = {} - self.paths = {} - self.uuid = svn.ra.get_uuid(self.ra) + with util.with_lc_ctype(): + self.transport = transport.SvnRaTransport(url=self.url) + self.ra = self.transport.ra + self.ctx = self.transport.client + self.baseurl = svn.ra.get_repos_root(self.ra) + # Module is either empty or a repository path starting with + # a slash and not ending with a slash. + self.module = urlreq.unquote(self.url[len(self.baseurl) :]) + self.prevmodule = None + self.rootmodule = self.module + self.commits = {} + self.paths = {} + self.uuid = svn.ra.get_uuid(self.ra) except svn.core.SubversionException: ui.traceback() svnversion = b'%d.%d.%d' % ( @@ -477,7 +479,8 @@ class svn_source(converter_source): ) try: - self.head = self.latest(self.module, latest) + with util.with_lc_ctype(): + self.head = self.latest(self.module, latest) except SvnPathNotFound: self.head = None if not self.head: @@ -494,6 +497,13 @@ class svn_source(converter_source): self.wc = None self.convertfp = None + def before(self): + self.with_lc_ctype = util.with_lc_ctype() + self.with_lc_ctype.__enter__() + + def after(self): + self.with_lc_ctype.__exit__(None, None, None) + def setrevmap(self, revmap): lastrevs = {} for revid in revmap: diff --git a/hgext/histedit.py b/hgext/histedit.py --- a/hgext/histedit.py +++ b/hgext/histedit.py @@ -201,7 +201,6 @@ except ImportError: termios = None import functools -import locale import os import struct @@ -1711,11 +1710,8 @@ def _chistedit(ui, repo, freeargs, opts) ctxs = [] for i, r in enumerate(revs): ctxs.append(histeditrule(ui, repo[r], i)) - # Curses requires setting the locale or it will default to the C - # locale. This sets the locale to the user's default system - # locale. - locale.setlocale(locale.LC_ALL, '') - rc = curses.wrapper(functools.partial(_chisteditmain, repo, ctxs)) + with util.with_lc_ctype(): + rc = curses.wrapper(functools.partial(_chisteditmain, repo, ctxs)) curses.echo() curses.endwin() if rc is False: diff --git a/mercurial/crecord.py b/mercurial/crecord.py --- a/mercurial/crecord.py +++ b/mercurial/crecord.py @@ -10,7 +10,6 @@ from __future__ import absolute_import -import locale import os import re import signal @@ -566,14 +565,12 @@ def chunkselector(ui, headerlist, operat """ ui.write(_(b'starting interactive selection\n')) chunkselector = curseschunkselector(headerlist, ui, operation) - # This is required for ncurses to display non-ASCII characters in - # default user locale encoding correctly. --immerrr - locale.setlocale(locale.LC_ALL, '') origsigtstp = sentinel = object() if util.safehasattr(signal, b'SIGTSTP'): origsigtstp = signal.getsignal(signal.SIGTSTP) try: - curses.wrapper(chunkselector.main) + with util.with_lc_ctype(): + curses.wrapper(chunkselector.main) if chunkselector.initexc is not None: raise chunkselector.initexc # ncurses does not restore signal handler for SIGTSTP diff --git a/mercurial/util.py b/mercurial/util.py --- a/mercurial/util.py +++ b/mercurial/util.py @@ -22,6 +22,7 @@ import errno import gc import hashlib import itertools +import locale import mmap import os import platform as pyplatform @@ -3596,3 +3597,32 @@ def uvarintdecodestream(fh): if not (byte & 0x80): return result shift += 7 + + +# Passing the '' locale means that the locale should be set according to the +# user settings (environment variables). +# Python sometimes avoids setting the global locale settings. When interfacing +# with C code (e.g. the curses module or the Subversion bindings), the global +# locale settings must be initialized correctly. Python 2 does not initialize +# the global locale settings on interpreter startup. Python 3 sometimes +# initializes LC_CTYPE, but not consistently at least on Windows. Therefore we +# explicitly initialize it to get consistent behavior if it's not already +# initialized. Since CPython commit 177d921c8c03d30daa32994362023f777624b10d, +# LC_CTYPE is always initialized. If we require Python 3.8+, we should re-check +# if we can remove this code. +@contextlib.contextmanager +def with_lc_ctype(): + oldloc = locale.setlocale(locale.LC_CTYPE, None) + if oldloc == 'C': + try: + try: + locale.setlocale(locale.LC_CTYPE, '') + except locale.Error: + # The likely case is that the locale from the environment + # variables is unknown. + pass + yield + finally: + locale.setlocale(locale.LC_CTYPE, oldloc) + else: + yield diff --git a/tests/run-tests.py b/tests/run-tests.py --- a/tests/run-tests.py +++ b/tests/run-tests.py @@ -2069,7 +2069,7 @@ class TTest(Test): if el.endswith(b" (esc)\n"): if PYTHON3: el = el[:-7].decode('unicode_escape') + '\n' - el = el.encode('utf-8') + el = el.encode('latin-1') else: el = el[:-7].decode('string-escape') + '\n' if el == l or os.name == 'nt' and el[:-1] + b'\r\n' == l: diff --git a/tests/test-unified-test.t b/tests/test-unified-test.t --- a/tests/test-unified-test.t +++ b/tests/test-unified-test.t @@ -75,6 +75,16 @@ Windows: \r\n is handled like \n and can crlf\r (esc) #endif +Escapes: + + $ $PYTHON -c 'from mercurial.utils.procutil import stdout; stdout.write(b"\xff")' + \xff (no-eol) (esc) + +Escapes with conditions: + + $ $PYTHON -c 'from mercurial.utils.procutil import stdout; stdout.write(b"\xff")' + \xff (no-eol) (esc) (true !) + Combining esc with other markups - and handling lines ending with \r instead of \n: $ printf 'foo/bar\r'