##// END OF EJS Templates
dirs: resolve fuzzer OOM situation by disallowing deep directory hierarchies...
dirs: resolve fuzzer OOM situation by disallowing deep directory hierarchies It seems like 2048 directories ought to be enough for any reasonable use of Mercurial? A previous version of this patch scanned for slashes before any allocations occurred. That approach is slower than this in the happy path, but much faster than this in the case that too many slashes are encountered. We may want to revisit it in the future using memchr() so it'll be well-optimized by the libc we're using. .. bc: Mercurial will now defend against OOMs by refusing to operate on paths with 2048 or more components. This means that _extremely_ deep path hierarchies will be rejected, but we anticipate nobody is using hierarchies this deep. Differential Revision: https://phab.mercurial-scm.org/D7411

File last commit:

r43347:687b865b default
r44057:0796e266 default
Show More
bdiff.py
99 lines | 2.4 KiB | text/x-python | PythonLexer
Martin Geisler
pure Python implementation of bdiff.c
r7703 # bdiff.py - Python implementation of bdiff.c
#
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
#
Martin Geisler
updated license to be explicit about GPL version 2
r8225 # This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
Martin Geisler
pure Python implementation of bdiff.c
r7703
Gregory Szorc
bdiff: use absolute_import
r27335 from __future__ import absolute_import
import difflib
import re
import struct
Matt Mackall
pure/bdiff: fix circular import
r7944
Augie Fackler
formatting: blacken the codebase...
r43346
Matt Mackall
pure/bdiff: fix circular import
r7944 def splitnewlines(text):
'''like str.splitlines, but only split on newlines.'''
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 lines = [l + b'\n' for l in text.split(b'\n')]
Matt Mackall
pure/bdiff: fix circular import
r7944 if lines:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if lines[-1] == b'\n':
Matt Mackall
pure/bdiff: fix circular import
r7944 lines.pop()
else:
lines[-1] = lines[-1][:-1]
return lines
Martin Geisler
pure Python implementation of bdiff.c
r7703
Augie Fackler
formatting: blacken the codebase...
r43346
Martin Geisler
pure Python implementation of bdiff.c
r7703 def _normalizeblocks(a, b, blocks):
prev = None
Dan Villiom Podlaski Christiansen
pure bdiff: don't use a generator...
r14066 r = []
Martin Geisler
pure Python implementation of bdiff.c
r7703 for curr in blocks:
if prev is None:
prev = curr
continue
shift = 0
a1, b1, l1 = prev
a1end = a1 + l1
b1end = b1 + l1
a2, b2, l2 = curr
a2end = a2 + l2
b2end = b2 + l2
if a1end == a2:
Augie Fackler
formatting: blacken the codebase...
r43346 while (
a1end + shift < a2end and a[a1end + shift] == b[b1end + shift]
):
Martin Geisler
pure Python implementation of bdiff.c
r7703 shift += 1
elif b1end == b2:
Augie Fackler
formatting: blacken the codebase...
r43346 while (
b1end + shift < b2end and a[a1end + shift] == b[b1end + shift]
):
Martin Geisler
pure Python implementation of bdiff.c
r7703 shift += 1
Dan Villiom Podlaski Christiansen
pure bdiff: don't use a generator...
r14066 r.append((a1, b1, l1 + shift))
Matt Mackall
many, many trivial check-code fixups
r10282 prev = a2 + shift, b2 + shift, l2 - shift
Dan Villiom Podlaski Christiansen
pure bdiff: don't use a generator...
r14066 r.append(prev)
return r
Martin Geisler
pure Python implementation of bdiff.c
r7703
Augie Fackler
formatting: blacken the codebase...
r43346
Martin Geisler
pure Python implementation of bdiff.c
r7703 def bdiff(a, b):
Yuya Nishihara
py3: use bytes() to cast to immutable bytes in pure.bdiff.bdiff()
r31641 a = bytes(a).splitlines(True)
b = bytes(b).splitlines(True)
Martin Geisler
pure Python implementation of bdiff.c
r7703
if not a:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 s = b"".join(b)
return s and (struct.pack(b">lll", 0, 0, len(s)) + s)
Martin Geisler
pure Python implementation of bdiff.c
r7703
bin = []
p = [0]
Alex Gaynor
style: never put multiple statements on one line...
r34436 for i in a:
p.append(p[-1] + len(i))
Martin Geisler
pure Python implementation of bdiff.c
r7703
d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
d = _normalizeblocks(a, b, d)
la = 0
lb = 0
for am, bm, size in d:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 s = b"".join(b[lb:bm])
Martin Geisler
pure Python implementation of bdiff.c
r7703 if am > la or s:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 bin.append(struct.pack(b">lll", p[la], p[am], len(s)) + s)
Martin Geisler
pure Python implementation of bdiff.c
r7703 la = am + size
lb = bm + size
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return b"".join(bin)
Martin Geisler
pure Python implementation of bdiff.c
r7703
Augie Fackler
formatting: blacken the codebase...
r43346
Martin Geisler
pure Python implementation of bdiff.c
r7703 def blocks(a, b):
Matt Mackall
pure/bdiff: fix circular import
r7944 an = splitnewlines(a)
bn = splitnewlines(b)
Martin Geisler
pure Python implementation of bdiff.c
r7703 d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
d = _normalizeblocks(an, bn, d)
return [(i, i + n, j, j + n) for (i, j, n) in d]
Augie Fackler
formatting: blacken the codebase...
r43346
Patrick Mezard
mdiff: replace wscleanup() regexps with C loops...
r15530 def fixws(text, allws):
if allws:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 text = re.sub(b'[ \t\r]+', b'', text)
Patrick Mezard
mdiff: replace wscleanup() regexps with C loops...
r15530 else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 text = re.sub(b'[ \t\r]+', b' ', text)
text = text.replace(b' \n', b'\n')
Patrick Mezard
mdiff: replace wscleanup() regexps with C loops...
r15530 return text