# HG changeset patch # User Augie Fackler # Date 2018-09-06 06:36:25 # Node ID 8c692a6b5ad19f5d8f44ebd3a4f81d183cf438b2 # Parent 55db747a21ad7352bd97036e01bc45422c7f26a0 fuzz: new fuzzer for cext/manifest.c This is a bit messy, because lazymanifest is tightly coupled to the cpython API for performance reasons. As a result, we have to build a whole Python without pymalloc (so ASAN can help us out) and link against that. Then we have to use an embedded Python interpreter. We could manually drive the lazymanifest in C from that point, but experimentally just using PyEval_EvalCode isn't really any slower so we may as well do that and write the innermost guts of the fuzzer in Python. Leak detection is currently disabled for this fuzzer because there are a few global-lifetime things in our extensions that we more or less intentionally leak and I didn't want to take the detour to work around that for now. This should not be pushed to our repo until https://github.com/google/oss-fuzz/pull/1853 is merged, as this depends on having the Python tarball around. Differential Revision: https://phab.mercurial-scm.org/D4879 diff --git a/contrib/fuzz/Makefile b/contrib/fuzz/Makefile --- a/contrib/fuzz/Makefile +++ b/contrib/fuzz/Makefile @@ -70,12 +70,62 @@ xdiff_fuzzer: xdiff.cc fuzz-xdiffi.o fuz fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o fuzzutil-oss-fuzz.o \ -lFuzzingEngine -o $$OUT/xdiff_fuzzer +# TODO use the $OUT env var instead of hardcoding /out +/out/sanpy/bin/python: + cd /Python-2.7.15/ && ./configure --without-pymalloc --prefix=$$OUT/sanpy CFLAGS='-O1 -fno-omit-frame-pointer -g -fwrapv -fstack-protector-strong' LDFLAGS=-lasan && ASAN_OPTIONS=detect_leaks=0 make && make install + +sanpy: /out/sanpy/bin/python + +manifest.o: sanpy ../../mercurial/cext/manifest.c + $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ + -I../../mercurial \ + -c -o manifest.o ../../mercurial/cext/manifest.c + +charencode.o: sanpy ../../mercurial/cext/charencode.c + $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ + -I../../mercurial \ + -c -o charencode.o ../../mercurial/cext/charencode.c + +parsers.o: sanpy ../../mercurial/cext/parsers.c + $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ + -I../../mercurial \ + -c -o parsers.o ../../mercurial/cext/parsers.c + +dirs.o: sanpy ../../mercurial/cext/dirs.c + $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ + -I../../mercurial \ + -c -o dirs.o ../../mercurial/cext/dirs.c + +pathencode.o: sanpy ../../mercurial/cext/pathencode.c + $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ + -I../../mercurial \ + -c -o pathencode.o ../../mercurial/cext/pathencode.c + +revlog.o: sanpy ../../mercurial/cext/revlog.c + $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ + -I../../mercurial \ + -c -o revlog.o ../../mercurial/cext/revlog.c + +manifest_fuzzer: sanpy manifest.cc manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o + $(CXX) $(CXXFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ + -Wno-register -Wno-macro-redefined \ + -I../../mercurial manifest.cc \ + manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o \ + -lFuzzingEngine `$$OUT/sanpy/bin/python-config --ldflags` \ + -o $$OUT/manifest_fuzzer + +manifest_corpus.zip: + python manifest_corpus.py $$OUT/manifest_fuzzer_seed_corpus.zip + +copy_options: + cp *.options $$OUT + clean: $(RM) *.o *_fuzzer \ bdiff \ mpatch \ xdiff -oss-fuzz: bdiff_fuzzer mpatch_fuzzer mpatch_corpus.zip xdiff_fuzzer +oss-fuzz: bdiff_fuzzer mpatch_fuzzer mpatch_corpus.zip xdiff_fuzzer manifest_fuzzer manifest_corpus.zip copy_options -.PHONY: all clean oss-fuzz +.PHONY: all clean oss-fuzz sanpy copy_options diff --git a/contrib/fuzz/manifest.cc b/contrib/fuzz/manifest.cc new file mode 100644 --- /dev/null +++ b/contrib/fuzz/manifest.cc @@ -0,0 +1,75 @@ +#include +#include +#include +#include + +#include + +extern "C" { + +/* TODO: use Python 3 for this fuzzing? */ +PyMODINIT_FUNC initparsers(void); + +static char cpypath[8192] = "\0"; + +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) +{ + const std::string subdir = "/sanpy/lib/python2.7"; + /* HACK ALERT: we need a full Python installation built without + pymalloc and with ASAN, so we dump one in + $OUT/sanpy/lib/python2.7. This helps us wire that up. */ + std::string selfpath(*argv[0]); + std::string pypath; + auto pos = selfpath.rfind("/"); + if (pos == std::string::npos) { + char wd[8192]; + getcwd(wd, 8192); + pypath = std::string(wd) + subdir; + } else { + pypath = selfpath.substr(0, pos) + subdir; + } + strncpy(cpypath, pypath.c_str(), pypath.size()); + setenv("PYTHONPATH", cpypath, 1); + Py_SetPythonHome(cpypath); + return 0; +} + +int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) +{ + Py_InitializeEx(0); + initparsers(); + PyObject *mtext = + PyBytes_FromStringAndSize((const char *)Data, (Py_ssize_t)Size); + PyObject *mainmod = PyImport_AddModule("__main__"); + PyObject *globals = PyModule_GetDict(mainmod); + PyObject *locals = PyDict_New(); + PyDict_SetItemString(locals, "mdata", mtext); + PyCodeObject *code = + (PyCodeObject *)Py_CompileString(R"py( +from parsers import lazymanifest +lm = lazymanifest(mdata) +try: + # iterate the whole thing, which causes the code to fully parse + # every line in the manifest + list(lm.iterentries()) + lm[b'xyzzy'] = (b'\0' * 20, 'x') + # do an insert, text should change + assert lm.text() != mdata, "insert should change text and didn't: %r %r" % (lm.text(), mdata) + del lm[b'xyzzy'] + # should be back to the same + assert lm.text() == mdata, "delete should have restored text but didn't: %r %r" % (lm.text(), mdata) +except Exception as e: + pass + # uncomment this print if you're editing this Python code + # to debug failures. + # print e +)py", + "fuzzer", Py_file_input); + PyEval_EvalCode(code, globals, locals); + Py_DECREF(code); + Py_DECREF(locals); + Py_DECREF(mtext); + Py_Finalize(); + return 0; // Non-zero return values are reserved for future use. +} +} diff --git a/contrib/fuzz/manifest_corpus.py b/contrib/fuzz/manifest_corpus.py new file mode 100644 --- /dev/null +++ b/contrib/fuzz/manifest_corpus.py @@ -0,0 +1,30 @@ +from __future__ import absolute_import, print_function + +import argparse +import zipfile + +ap = argparse.ArgumentParser() +ap.add_argument("out", metavar="some.zip", type=str, nargs=1) +args = ap.parse_args() + +with zipfile.ZipFile(args.out[0], "w", zipfile.ZIP_STORED) as zf: + zf.writestr("manifest_zero", +'''PKG-INFO\09b3ed8f2b81095a13064402e930565f083346e9a +README\080b6e76643dcb44d4bc729e932fc464b3e36dbe3 +hg\0b6444347c629cc058d478023905cfb83b7f5bb9d +mercurial/__init__.py\0b80de5d138758541c5f05265ad144ab9fa86d1db +mercurial/byterange.py\017f5a9fbd99622f31a392c33ac1e903925dc80ed +mercurial/fancyopts.py\0b6f52e23e356748c5039313d8b639cda16bf67ba +mercurial/hg.py\023cc12f225f1b42f32dc0d897a4f95a38ddc8f4a +mercurial/mdiff.py\0a05f65c44bfbeec6a42336cd2ff0b30217899ca3 +mercurial/revlog.py\0217bc3fde6d82c0210cf56aeae11d05a03f35b2b +mercurial/transaction.py\09d180df101dc14ce3dd582fd998b36c98b3e39aa +notes.txt\0703afcec5edb749cf5cec67831f554d6da13f2fb +setup.py\0ccf3f6daf0f13101ca73631f7a1769e328b472c9 +tkmerge\03c922edb43a9c143682f7bc7b00f98b3c756ebe7 +''') + zf.writestr("badmanifest_shorthashes", + "narf\0aa\nnarf2\0aaa\n") + zf.writestr("badmanifest_nonull", + "narf\0cccccccccccccccccccccccccccccccccccccccc\n" + "narf2aaaaaaaaaaaaaaaaaaaa\n") diff --git a/contrib/fuzz/manifest_fuzzer.options b/contrib/fuzz/manifest_fuzzer.options new file mode 100644 --- /dev/null +++ b/contrib/fuzz/manifest_fuzzer.options @@ -0,0 +1,2 @@ +[libfuzzer] +detect_leaks = 0