# HG changeset patch
# User Matt Mackall <mpm@selenic.com>
# Date 2014-12-17 19:25:24
# Node ID 7b8ff3fd11d39bd809fc65c628d65045502178cc
# Parent  6006cad5e7a98610be6ea182d3b3d30cd6f4370b

highlight: ignore Unicode's extra linebreaks (issue4291)

Unicode and Python's unicode.splitlines() treat several extra legacy
ASCII codepoints as linebreaks, even though the vast bulk of computing
and Python's own str.splitlines() do not. Rather than introduce line
numbering confusion, we filter them out when highlighting.

diff --git a/hgext/highlight/highlight.py b/hgext/highlight/highlight.py
--- a/hgext/highlight/highlight.py
+++ b/hgext/highlight/highlight.py
@@ -32,6 +32,11 @@ def pygmentize(field, fctx, style, tmpl)
     if util.binary(text):
         return
 
+    # str.splitlines() != unicode.splitlines() because "reasons"
+    for c in "\x0c\x1c\x1d\x1e":
+        if c in text:
+            text = text.replace(c, '')
+
     # Pygments is best used with Unicode strings:
     # <http://pygments.org/docs/unicode/>
     text = text.decode(encoding.encoding, 'replace')