# HG changeset patch # User Christian Ebert # Date 2008-10-17 10:12:33 # Node ID db7557359636017e4ebf55c3a8a8d3558a3a9eb3 # Parent 50f4e866d693a18bd0a0fe0b3c99ad6be6526316 highlight: convert text to local before passing to pygmentize (issue1341) Example case: Display file written in iso-8859-1 with current HGENCODING utf-8. At the moment only an Error page appears because pygmentize chokes on the replacement chars. Alternatives: 1) Turn off highlighting and avoid UnicodeDecodeError for files that are not in HGENCODING. 2) [this patch] use util.tolocal to display these files. Alternative 2) seems ok, as this only concerns display and readability. See also: fe38b0a3a928, apparently put aside during refactor of highlight. Add test for UnicodeDecodeError with iso-8859-1 file contents. diff --git a/hgext/highlight/highlight.py b/hgext/highlight/highlight.py --- a/hgext/highlight/highlight.py +++ b/hgext/highlight/highlight.py @@ -29,6 +29,9 @@ def pygmentize(field, fctx, style, tmpl) if util.binary(text): return + # avoid UnicodeDecodeError in pygments + text = util.tolocal(text) + # To get multi-line strings right, we can't format line-by-line try: lexer = guess_lexer_for_filename(fctx.path(), text[:1024], diff --git a/tests/test-highlight b/tests/test-highlight --- a/tests/test-highlight +++ b/tests/test-highlight @@ -12,6 +12,10 @@ EOF hg init test cd test cp $TESTDIR/get-with-headers.py ./ + +# check for UnicodeDecodeError with iso-8859-1 file contents +python -c 'fp = open("isolatin", "w"); fp.write("h\xFCbsch\n"); fp.close();' + hg ci -Ama echo % hg serve @@ -22,6 +26,10 @@ echo % hgweb filerevision, html ("$TESTDIR/get-with-headers.py" localhost:$HGPORT '/file/tip/get-with-headers.py') \ | sed "s/[0-9]* years ago/long ago/g" | sed "s/class=\"k\"/class=\"kn\"/g" +echo % hgweb filerevision, html +("$TESTDIR/get-with-headers.py" localhost:$HGPORT '/file/tip/isolatin') \ + | sed "s/[0-9]* years ago/long ago/g" | sed "s/class=\"k\"/class=\"kn\"/g" + echo % hgweb fileannotate, html ("$TESTDIR/get-with-headers.py" localhost:$HGPORT '/annotate/tip/get-with-headers.py') \ | sed "s/[0-9]* years ago/long ago/g" | sed "s/class=\"k\"/class=\"kn\"/g" diff --git a/tests/test-highlight.out b/tests/test-highlight.out --- a/tests/test-highlight.out +++ b/tests/test-highlight.out @@ -1,4 +1,5 @@ adding get-with-headers.py +adding isolatin % hg serve % hgweb filerevision, html 200 Script output follows @@ -20,11 +21,11 @@ 200 Script output follows shortlog graph tags -changeset -files -revisions -annotate -raw +changeset +files +revisions +annotate +raw

get-with-headers.py

@@ -32,7 +33,7 @@ 200 Script output follows - + @@ -63,6 +64,69 @@ 200 Script output follows +% hgweb filerevision, html +200 Script output follows + + + + + + + + + +test:isolatin + + + +
+changelog +shortlog +graph +tags +changeset +files +revisions +annotate +raw +
+ +

isolatin

+ +
changeset 0:53f07353b803
7697c52ca9b0
+ + + + + + + + + + + + + + + + + + +
changeset 0:7697c52ca9b0
author:test
date:Thu Jan 01 00:00:00 1970 +0000 (long ago)
permissions:-rw-r--r--
description:a
+ +
+
1h?bsch
+
+ + + + + + + % hgweb fileannotate, html 200 Script output follows @@ -83,11 +147,11 @@ 200 Script output follows shortlog graph tags -changeset -files -file -revisions -raw +changeset +files +file +revisions +raw

Annotate get-with-headers.py

@@ -95,7 +159,7 @@ 200 Script output follows - + @@ -118,7 +182,7 @@ 200 Script output follows
changeset 0:53f07353b803
7697c52ca9b0
- +
test@0 1
#!/usr/bin/env python
test@0 2
test@0 3
__doc__ = """This does HTTP get requests given a host:port and path and returns
test@0 4
a subset of the headers plus the body of the result."""
test@0 5
test@0 6
import httplib, sys
test@0 7
test@0 8
try:
test@0 9
    import msvcrt, os
test@0 10
    msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
test@0 11
    msvcrt.setmode(sys.stderr.fileno(), os.O_BINARY)
test@0 12
except ImportError:
test@0 13
    pass
test@0 14
test@0 15
headers = [h.lower() for h in sys.argv[3:]]
test@0 16
conn = httplib.HTTPConnection(sys.argv[1])
test@0 17
conn.request("GET", sys.argv[2])
test@0 18
response = conn.getresponse()
test@0 19
print response.status, response.reason
test@0 20
for h in headers:
test@0 21
    if response.getheader(h, None) is not None:
test@0 22
        print "%s: %s" % (h, response.getheader(h))
test@0 23
print
test@0 24
sys.stdout.write(response.read())
test@0 25
test@0 26
if 200 <= response.status <= 299:
test@0 27
    sys.exit(0)
test@0 28
sys.exit(1)
test@0 1
#!/usr/bin/env python
test@0 2
test@0 3
__doc__ = """This does HTTP get requests given a host:port and path and returns
test@0 4
a subset of the headers plus the body of the result."""
test@0 5
test@0 6
import httplib, sys
test@0 7
test@0 8
try:
test@0 9
    import msvcrt, os
test@0 10
    msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
test@0 11
    msvcrt.setmode(sys.stderr.fileno(), os.O_BINARY)
test@0 12
except ImportError:
test@0 13
    pass
test@0 14
test@0 15
headers = [h.lower() for h in sys.argv[3:]]
test@0 16
conn = httplib.HTTPConnection(sys.argv[1])
test@0 17
conn.request("GET", sys.argv[2])
test@0 18
response = conn.getresponse()
test@0 19
print response.status, response.reason
test@0 20
for h in headers:
test@0 21
    if response.getheader(h, None) is not None:
test@0 22
        print "%s: %s" % (h, response.getheader(h))
test@0 23
print
test@0 24
sys.stdout.write(response.read())
test@0 25
test@0 26
if 200 <= response.status <= 299:
test@0 27
    sys.exit(0)
test@0 28
sys.exit(1)