# HG changeset patch
# User Arseniy Alekseyev <aalekseyev@janestreet.com>
# Date 2023-03-06 11:27:57
# Node ID 95acba2c29f6e90a7c19da0585a7b52efb416082
# Parent  bcf54837241d2efa0d4e0b727d7fe9b6433e3448

encoding: avoid quadratic time complexity when json-encoding non-UTF8 strings

Apparently the code uses "+=" with a bytes object, which is linear-time, so the
whole encoding is quadratic-time. This patch makes us use a bytearray object,
instead, which has a(n amortized-)constant-time append operation.

The encoding is still not particularly fast, but at least a 10MB file
takes tens of seconds, not many hours to encode.

diff --git a/mercurial/encoding.py b/mercurial/encoding.py
--- a/mercurial/encoding.py
+++ b/mercurial/encoding.py
@@ -657,7 +657,7 @@ def toutf8b(s):
             pass
 
     s = pycompat.bytestr(s)
-    r = b""
+    r = bytearray()
     pos = 0
     l = len(s)
     while pos < l:
@@ -673,7 +673,7 @@ def toutf8b(s):
             c = unichr(0xDC00 + ord(s[pos])).encode('utf-8', _utf8strict)
             pos += 1
         r += c
-    return r
+    return bytes(r)
 
 
 def fromutf8b(s):
@@ -712,7 +712,7 @@ def fromutf8b(s):
     # helper again to walk the string without "decoding" it.
 
     s = pycompat.bytestr(s)
-    r = b""
+    r = bytearray()
     pos = 0
     l = len(s)
     while pos < l:
@@ -722,4 +722,4 @@ def fromutf8b(s):
         if b"\xed\xb0\x80" <= c <= b"\xed\xb3\xbf":
             c = pycompat.bytechr(ord(c.decode("utf-8", _utf8strict)) & 0xFF)
         r += c
-    return r
+    return bytes(r)