diff --git a/contrib/fuzz/Makefile b/contrib/fuzz/Makefile --- a/contrib/fuzz/Makefile +++ b/contrib/fuzz/Makefile @@ -70,12 +70,62 @@ xdiff_fuzzer: xdiff.cc fuzz-xdiffi.o fuz fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o fuzzutil-oss-fuzz.o \ -lFuzzingEngine -o $$OUT/xdiff_fuzzer +# TODO use the $OUT env var instead of hardcoding /out +/out/sanpy/bin/python: + cd /Python-2.7.15/ && ./configure --without-pymalloc --prefix=$$OUT/sanpy CFLAGS='-O1 -fno-omit-frame-pointer -g -fwrapv -fstack-protector-strong' LDFLAGS=-lasan && ASAN_OPTIONS=detect_leaks=0 make && make install + +sanpy: /out/sanpy/bin/python + +manifest.o: sanpy ../../mercurial/cext/manifest.c + $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ + -I../../mercurial \ + -c -o manifest.o ../../mercurial/cext/manifest.c + +charencode.o: sanpy ../../mercurial/cext/charencode.c + $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ + -I../../mercurial \ + -c -o charencode.o ../../mercurial/cext/charencode.c + +parsers.o: sanpy ../../mercurial/cext/parsers.c + $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ + -I../../mercurial \ + -c -o parsers.o ../../mercurial/cext/parsers.c + +dirs.o: sanpy ../../mercurial/cext/dirs.c + $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ + -I../../mercurial \ + -c -o dirs.o ../../mercurial/cext/dirs.c + +pathencode.o: sanpy ../../mercurial/cext/pathencode.c + $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ + -I../../mercurial \ + -c -o pathencode.o ../../mercurial/cext/pathencode.c + +revlog.o: sanpy ../../mercurial/cext/revlog.c + $(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ + -I../../mercurial \ + -c -o revlog.o ../../mercurial/cext/revlog.c + +manifest_fuzzer: sanpy manifest.cc manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o + $(CXX) $(CXXFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \ + -Wno-register -Wno-macro-redefined \ + -I../../mercurial manifest.cc \ + manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o \ + -lFuzzingEngine `$$OUT/sanpy/bin/python-config --ldflags` \ + -o $$OUT/manifest_fuzzer + +manifest_corpus.zip: + python manifest_corpus.py $$OUT/manifest_fuzzer_seed_corpus.zip + +copy_options: + cp *.options $$OUT + clean: $(RM) *.o *_fuzzer \ bdiff \ mpatch \ xdiff -oss-fuzz: bdiff_fuzzer mpatch_fuzzer mpatch_corpus.zip xdiff_fuzzer +oss-fuzz: bdiff_fuzzer mpatch_fuzzer mpatch_corpus.zip xdiff_fuzzer manifest_fuzzer manifest_corpus.zip copy_options -.PHONY: all clean oss-fuzz +.PHONY: all clean oss-fuzz sanpy copy_options diff --git a/contrib/fuzz/manifest.cc b/contrib/fuzz/manifest.cc new file mode 100644 --- /dev/null +++ b/contrib/fuzz/manifest.cc @@ -0,0 +1,75 @@ +#include +#include +#include +#include + +#include + +extern "C" { + +/* TODO: use Python 3 for this fuzzing? */ +PyMODINIT_FUNC initparsers(void); + +static char cpypath[8192] = "\0"; + +extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) +{ + const std::string subdir = "/sanpy/lib/python2.7"; + /* HACK ALERT: we need a full Python installation built without + pymalloc and with ASAN, so we dump one in + $OUT/sanpy/lib/python2.7. This helps us wire that up. */ + std::string selfpath(*argv[0]); + std::string pypath; + auto pos = selfpath.rfind("/"); + if (pos == std::string::npos) { + char wd[8192]; + getcwd(wd, 8192); + pypath = std::string(wd) + subdir; + } else { + pypath = selfpath.substr(0, pos) + subdir; + } + strncpy(cpypath, pypath.c_str(), pypath.size()); + setenv("PYTHONPATH", cpypath, 1); + Py_SetPythonHome(cpypath); + return 0; +} + +int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) +{ + Py_InitializeEx(0); + initparsers(); + PyObject *mtext = + PyBytes_FromStringAndSize((const char *)Data, (Py_ssize_t)Size); + PyObject *mainmod = PyImport_AddModule("__main__"); + PyObject *globals = PyModule_GetDict(mainmod); + PyObject *locals = PyDict_New(); + PyDict_SetItemString(locals, "mdata", mtext); + PyCodeObject *code = + (PyCodeObject *)Py_CompileString(R"py( +from parsers import lazymanifest +lm = lazymanifest(mdata) +try: + # iterate the whole thing, which causes the code to fully parse + # every line in the manifest + list(lm.iterentries()) + lm[b'xyzzy'] = (b'\0' * 20, 'x') + # do an insert, text should change + assert lm.text() != mdata, "insert should change text and didn't: %r %r" % (lm.text(), mdata) + del lm[b'xyzzy'] + # should be back to the same + assert lm.text() == mdata, "delete should have restored text but didn't: %r %r" % (lm.text(), mdata) +except Exception as e: + pass + # uncomment this print if you're editing this Python code + # to debug failures. + # print e +)py", + "fuzzer", Py_file_input); + PyEval_EvalCode(code, globals, locals); + Py_DECREF(code); + Py_DECREF(locals); + Py_DECREF(mtext); + Py_Finalize(); + return 0; // Non-zero return values are reserved for future use. +} +} diff --git a/contrib/fuzz/manifest_corpus.py b/contrib/fuzz/manifest_corpus.py new file mode 100644 --- /dev/null +++ b/contrib/fuzz/manifest_corpus.py @@ -0,0 +1,30 @@ +from __future__ import absolute_import, print_function + +import argparse +import zipfile + +ap = argparse.ArgumentParser() +ap.add_argument("out", metavar="some.zip", type=str, nargs=1) +args = ap.parse_args() + +with zipfile.ZipFile(args.out[0], "w", zipfile.ZIP_STORED) as zf: + zf.writestr("manifest_zero", +'''PKG-INFO\09b3ed8f2b81095a13064402e930565f083346e9a +README\080b6e76643dcb44d4bc729e932fc464b3e36dbe3 +hg\0b6444347c629cc058d478023905cfb83b7f5bb9d +mercurial/__init__.py\0b80de5d138758541c5f05265ad144ab9fa86d1db +mercurial/byterange.py\017f5a9fbd99622f31a392c33ac1e903925dc80ed +mercurial/fancyopts.py\0b6f52e23e356748c5039313d8b639cda16bf67ba +mercurial/hg.py\023cc12f225f1b42f32dc0d897a4f95a38ddc8f4a +mercurial/mdiff.py\0a05f65c44bfbeec6a42336cd2ff0b30217899ca3 +mercurial/revlog.py\0217bc3fde6d82c0210cf56aeae11d05a03f35b2b +mercurial/transaction.py\09d180df101dc14ce3dd582fd998b36c98b3e39aa +notes.txt\0703afcec5edb749cf5cec67831f554d6da13f2fb +setup.py\0ccf3f6daf0f13101ca73631f7a1769e328b472c9 +tkmerge\03c922edb43a9c143682f7bc7b00f98b3c756ebe7 +''') + zf.writestr("badmanifest_shorthashes", + "narf\0aa\nnarf2\0aaa\n") + zf.writestr("badmanifest_nonull", + "narf\0cccccccccccccccccccccccccccccccccccccccc\n" + "narf2aaaaaaaaaaaaaaaaaaaa\n") diff --git a/contrib/fuzz/manifest_fuzzer.options b/contrib/fuzz/manifest_fuzzer.options new file mode 100644 --- /dev/null +++ b/contrib/fuzz/manifest_fuzzer.options @@ -0,0 +1,2 @@ +[libfuzzer] +detect_leaks = 0