##// END OF EJS Templates
diff: do not concatenate immutable bytes while building a/b bodies (issue6445)...
diff: do not concatenate immutable bytes while building a/b bodies (issue6445) Use bytearray instead. I don't know what's changed since Python 2, but bytes concatenation is 100x slow on Python 3. % python2.7 -m timeit -s "s = b''" "for i in range(10000): s += b'line'" 1000 loops, best of 3: 321 usec per loop % python3.9 -m timeit -s "s = b''" "for i in range(10000): s += b'line'" 5 loops, best of 5: 39.2 msec per loop Benchmark using tailwind.css (measuring the fast path, a is empty): % HGRCPATH=/dev/null python2.7 ./hg log -R /tmp/issue6445 -p --time \ --color=always --config diff.word-diff=true >/dev/null (prev) time: real 1.580 secs (user 1.560+0.000 sys 0.020+0.000) (this) time: real 1.610 secs (user 1.570+0.000 sys 0.030+0.000) % HGRCPATH=/dev/null python3.9 ./hg log -R /tmp/issue6445 -p --time \ --color=always --config diff.word-diff=true >/dev/null (prev) time: real 114.500 secs (user 114.460+0.000 sys 0.030+0.000) (this) time: real 2.180 secs (user 2.140+0.000 sys 0.040+0.000) Benchmark using random tabular text data (not the fast path): % dd if=/dev/urandom bs=1k count=1000 | hexdump -v -e '16/1 "%3u," "\n"' > ttf % hg ci -ma % dd if=/dev/urandom bs=1k count=1000 | hexdump -v -e '16/1 "%3u," "\n"' > ttf % hg ci -mb % HGRCPATH=/dev/null python2.7 ./hg log -R /tmp/issue6445 -p --time \ --color=always --config diff.word-diff=true >/dev/null (prev) time: real 3.240 secs (user 3.040+0.000 sys 0.200+0.000 (this) time: real 3.230 secs (user 3.070+0.000 sys 0.160+0.000) % HGRCPATH=/dev/null python3.9 ./hg log -R /tmp/issue6445 -p --time \ --color=always --config diff.word-diff=true >/dev/null (prev) time: real 44.130 secs (user 43.850+0.000 sys 0.270+0.000) (this) time: real 4.170 secs (user 3.850+0.000 sys 0.310+0.000)

File last commit:

r44082:4cd91104 stable
r46624:210f9b8d stable
Show More
base85.py
88 lines | 2.0 KiB | text/x-python | PythonLexer
# base85.py: pure python base85 codec
#
# Copyright (C) 2009 Brendan Cully <brendan@kublai.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import
import struct
from .. import pycompat
_b85chars = pycompat.bytestr(
b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef"
b"ghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"
)
_b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
_b85dec = {}
def _mkb85dec():
for i, c in enumerate(_b85chars):
_b85dec[c] = i
def b85encode(text, pad=False):
"""encode text in base85 format"""
l = len(text)
r = l % 4
if r:
text += b'\0' * (4 - r)
longs = len(text) >> 2
words = struct.unpack(b'>%dL' % longs, text)
out = b''.join(
_b85chars[(word // 52200625) % 85]
+ _b85chars2[(word // 7225) % 7225]
+ _b85chars2[word % 7225]
for word in words
)
if pad:
return out
# Trim padding
olen = l % 4
if olen:
olen += 1
olen += l // 4 * 5
return out[:olen]
def b85decode(text):
"""decode base85-encoded text"""
if not _b85dec:
_mkb85dec()
l = len(text)
out = []
for i in range(0, len(text), 5):
chunk = text[i : i + 5]
chunk = pycompat.bytestr(chunk)
acc = 0
for j, c in enumerate(chunk):
try:
acc = acc * 85 + _b85dec[c]
except KeyError:
raise ValueError(
'bad base85 character at position %d' % (i + j)
)
if acc > 4294967295:
raise ValueError('Base85 overflow in hunk starting at byte %d' % i)
out.append(acc)
# Pad final chunk if necessary
cl = l % 5
if cl:
acc *= 85 ** (5 - cl)
if cl > 1:
acc += 0xFFFFFF >> (cl - 2) * 8
out[-1] = acc
out = struct.pack(b'>%dL' % (len(out)), *out)
if cl:
out = out[: -(5 - cl)]
return out