diff --git a/hgext/highlight/highlight.py b/hgext/highlight/highlight.py --- a/hgext/highlight/highlight.py +++ b/hgext/highlight/highlight.py @@ -32,26 +32,27 @@ def pygmentize(field, fctx, style, tmpl) if util.binary(text): return - # avoid UnicodeDecodeError in pygments - text = encoding.tolocal(text) + # Pygments is best used with Unicode strings: + # + text = text.decode(encoding.encoding, 'replace') # To get multi-line strings right, we can't format line-by-line try: - lexer = guess_lexer_for_filename(fctx.path(), text[:1024], - encoding=encoding.encoding) + lexer = guess_lexer_for_filename(fctx.path(), text[:1024]) except (ClassNotFound, ValueError): try: - lexer = guess_lexer(text[:1024], encoding=encoding.encoding) + lexer = guess_lexer(text[:1024]) except (ClassNotFound, ValueError): - lexer = TextLexer(encoding=encoding.encoding) + lexer = TextLexer() - formatter = HtmlFormatter(style=style, encoding=encoding.encoding) + formatter = HtmlFormatter(style=style) colorized = highlight(text, lexer, formatter) # strip wrapping div colorized = colorized[:colorized.find('\n')] colorized = colorized[colorized.find('
')+5:]
-    coloriter = iter(colorized.splitlines())
+    coloriter = (s.encode(encoding.encoding, 'replace')
+                 for s in colorized.splitlines())
 
     tmpl.filters['colorize'] = lambda x: coloriter.next()
 
diff --git a/tests/test-highlight b/tests/test-highlight
--- a/tests/test-highlight
+++ b/tests/test-highlight
@@ -121,3 +121,28 @@ rm out
 
 echo % errors encountered
 cat errors.log
+
+cd ..
+hg init eucjp
+cd eucjp
+
+printf '\265\376\n' >> eucjp.txt  # Japanese kanji "Kyo"
+
+hg ci -Ama
+
+hgserveget () {
+    "$TESTDIR/killdaemons.py"
+    echo % HGENCODING="$1" hg serve
+    HGENCODING="$1" hg serve -p $HGPORT -d -n test --pid-file=hg.pid -E errors.log
+    cat hg.pid >> $DAEMON_PIDS
+
+    echo % hgweb filerevision, html
+    "$TESTDIR/get-with-headers.py" localhost:$HGPORT "/file/tip/$2" \
+        | grep '
' | $TESTDIR/printrepr.py + echo % errors encountered + cat errors.log +} + +hgserveget euc-jp eucjp.txt +hgserveget utf-8 eucjp.txt +hgserveget us-ascii eucjp.txt diff --git a/tests/test-highlight.out b/tests/test-highlight.out --- a/tests/test-highlight.out +++ b/tests/test-highlight.out @@ -538,3 +538,16 @@ 200 Script output follows /* pygments_style = fruity */ % errors encountered +adding eucjp.txt +% HGENCODING=euc-jp hg serve +% hgweb filerevision, html +
1 \xb5\xfe
+% errors encountered +% HGENCODING=utf-8 hg serve +% hgweb filerevision, html +
1 \xef\xbf\xbd\xef\xbf\xbd
+% errors encountered +% HGENCODING=us-ascii hg serve +% hgweb filerevision, html +
1 ??
+% errors encountered