Show More
@@ -0,0 +1,368 b'' | |||
|
1 | //===- FuzzedDataProvider.h - Utility header for fuzz targets ---*- C++ -* ===// | |
|
2 | // | |
|
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |
|
4 | // See https://llvm.org/LICENSE.txt for license information. | |
|
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |
|
6 | // | |
|
7 | //===----------------------------------------------------------------------===// | |
|
8 | // A single header library providing an utility class to break up an array of | |
|
9 | // bytes. Whenever run on the same input, provides the same output, as long as | |
|
10 | // its methods are called in the same order, with the same arguments. | |
|
11 | //===----------------------------------------------------------------------===// | |
|
12 | ||
|
13 | #ifndef LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_ | |
|
14 | #define LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_ | |
|
15 | ||
|
16 | #include <algorithm> | |
|
17 | #include <climits> | |
|
18 | #include <cstddef> | |
|
19 | #include <cstdint> | |
|
20 | #include <cstring> | |
|
21 | #include <initializer_list> | |
|
22 | #include <string> | |
|
23 | #include <type_traits> | |
|
24 | #include <utility> | |
|
25 | #include <vector> | |
|
26 | ||
|
27 | // In addition to the comments below, the API is also briefly documented at | |
|
28 | // https://github.com/google/fuzzing/blob/master/docs/split-inputs.md#fuzzed-data-provider | |
|
29 | class FuzzedDataProvider | |
|
30 | { | |
|
31 | public: | |
|
32 | // |data| is an array of length |size| that the FuzzedDataProvider wraps | |
|
33 | // to provide more granular access. |data| must outlive the | |
|
34 | // FuzzedDataProvider. | |
|
35 | FuzzedDataProvider(const uint8_t *data, size_t size) | |
|
36 | : data_ptr_(data), remaining_bytes_(size) | |
|
37 | { | |
|
38 | } | |
|
39 | ~FuzzedDataProvider() = default; | |
|
40 | ||
|
41 | // Returns a std::vector containing |num_bytes| of input data. If fewer | |
|
42 | // than |num_bytes| of data remain, returns a shorter std::vector | |
|
43 | // containing all of the data that's left. Can be used with any byte | |
|
44 | // sized type, such as char, unsigned char, uint8_t, etc. | |
|
45 | template <typename T> std::vector<T> ConsumeBytes(size_t num_bytes) | |
|
46 | { | |
|
47 | num_bytes = std::min(num_bytes, remaining_bytes_); | |
|
48 | return ConsumeBytes<T>(num_bytes, num_bytes); | |
|
49 | } | |
|
50 | ||
|
51 | // Similar to |ConsumeBytes|, but also appends the terminator value at | |
|
52 | // the end of the resulting vector. Useful, when a mutable | |
|
53 | // null-terminated C-string is needed, for example. But that is a rare | |
|
54 | // case. Better avoid it, if possible, and prefer using |ConsumeBytes| | |
|
55 | // or |ConsumeBytesAsString| methods. | |
|
56 | template <typename T> | |
|
57 | std::vector<T> ConsumeBytesWithTerminator(size_t num_bytes, | |
|
58 | T terminator = 0) | |
|
59 | { | |
|
60 | num_bytes = std::min(num_bytes, remaining_bytes_); | |
|
61 | std::vector<T> result = | |
|
62 | ConsumeBytes<T>(num_bytes + 1, num_bytes); | |
|
63 | result.back() = terminator; | |
|
64 | return result; | |
|
65 | } | |
|
66 | ||
|
67 | // Returns a std::string containing |num_bytes| of input data. Using | |
|
68 | // this and | |
|
69 | // |.c_str()| on the resulting string is the best way to get an | |
|
70 | // immutable null-terminated C string. If fewer than |num_bytes| of data | |
|
71 | // remain, returns a shorter std::string containing all of the data | |
|
72 | // that's left. | |
|
73 | std::string ConsumeBytesAsString(size_t num_bytes) | |
|
74 | { | |
|
75 | static_assert(sizeof(std::string::value_type) == | |
|
76 | sizeof(uint8_t), | |
|
77 | "ConsumeBytesAsString cannot convert the data to " | |
|
78 | "a string."); | |
|
79 | ||
|
80 | num_bytes = std::min(num_bytes, remaining_bytes_); | |
|
81 | std::string result( | |
|
82 | reinterpret_cast<const std::string::value_type *>( | |
|
83 | data_ptr_), | |
|
84 | num_bytes); | |
|
85 | Advance(num_bytes); | |
|
86 | return result; | |
|
87 | } | |
|
88 | ||
|
89 | // Returns a number in the range [min, max] by consuming bytes from the | |
|
90 | // input data. The value might not be uniformly distributed in the given | |
|
91 | // range. If there's no input data left, always returns |min|. |min| | |
|
92 | // must be less than or equal to |max|. | |
|
93 | template <typename T> T ConsumeIntegralInRange(T min, T max) | |
|
94 | { | |
|
95 | static_assert(std::is_integral<T>::value, | |
|
96 | "An integral type is required."); | |
|
97 | static_assert(sizeof(T) <= sizeof(uint64_t), | |
|
98 | "Unsupported integral type."); | |
|
99 | ||
|
100 | if (min > max) | |
|
101 | abort(); | |
|
102 | ||
|
103 | // Use the biggest type possible to hold the range and the | |
|
104 | // result. | |
|
105 | uint64_t range = static_cast<uint64_t>(max) - min; | |
|
106 | uint64_t result = 0; | |
|
107 | size_t offset = 0; | |
|
108 | ||
|
109 | while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 && | |
|
110 | remaining_bytes_ != 0) { | |
|
111 | // Pull bytes off the end of the seed data. | |
|
112 | // Experimentally, this seems to allow the fuzzer to | |
|
113 | // more easily explore the input space. This makes | |
|
114 | // sense, since it works by modifying inputs that caused | |
|
115 | // new code to run, and this data is often used to | |
|
116 | // encode length of data read by |ConsumeBytes|. | |
|
117 | // Separating out read lengths makes it easier modify | |
|
118 | // the contents of the data that is actually read. | |
|
119 | --remaining_bytes_; | |
|
120 | result = | |
|
121 | (result << CHAR_BIT) | data_ptr_[remaining_bytes_]; | |
|
122 | offset += CHAR_BIT; | |
|
123 | } | |
|
124 | ||
|
125 | // Avoid division by 0, in case |range + 1| results in overflow. | |
|
126 | if (range != std::numeric_limits<decltype(range)>::max()) | |
|
127 | result = result % (range + 1); | |
|
128 | ||
|
129 | return static_cast<T>(min + result); | |
|
130 | } | |
|
131 | ||
|
132 | // Returns a std::string of length from 0 to |max_length|. When it runs | |
|
133 | // out of input data, returns what remains of the input. Designed to be | |
|
134 | // more stable with respect to a fuzzer inserting characters than just | |
|
135 | // picking a random length and then consuming that many bytes with | |
|
136 | // |ConsumeBytes|. | |
|
137 | std::string ConsumeRandomLengthString(size_t max_length) | |
|
138 | { | |
|
139 | // Reads bytes from the start of |data_ptr_|. Maps "\\" to "\", | |
|
140 | // and maps "\" followed by anything else to the end of the | |
|
141 | // string. As a result of this logic, a fuzzer can insert | |
|
142 | // characters into the string, and the string will be lengthened | |
|
143 | // to include those new characters, resulting in a more stable | |
|
144 | // fuzzer than picking the length of a string independently from | |
|
145 | // picking its contents. | |
|
146 | std::string result; | |
|
147 | ||
|
148 | // Reserve the anticipated capaticity to prevent several | |
|
149 | // reallocations. | |
|
150 | result.reserve(std::min(max_length, remaining_bytes_)); | |
|
151 | for (size_t i = 0; i < max_length && remaining_bytes_ != 0; | |
|
152 | ++i) { | |
|
153 | char next = ConvertUnsignedToSigned<char>(data_ptr_[0]); | |
|
154 | Advance(1); | |
|
155 | if (next == '\\' && remaining_bytes_ != 0) { | |
|
156 | next = | |
|
157 | ConvertUnsignedToSigned<char>(data_ptr_[0]); | |
|
158 | Advance(1); | |
|
159 | if (next != '\\') | |
|
160 | break; | |
|
161 | } | |
|
162 | result += next; | |
|
163 | } | |
|
164 | ||
|
165 | result.shrink_to_fit(); | |
|
166 | return result; | |
|
167 | } | |
|
168 | ||
|
169 | // Returns a std::vector containing all remaining bytes of the input | |
|
170 | // data. | |
|
171 | template <typename T> std::vector<T> ConsumeRemainingBytes() | |
|
172 | { | |
|
173 | return ConsumeBytes<T>(remaining_bytes_); | |
|
174 | } | |
|
175 | ||
|
176 | // Returns a std::string containing all remaining bytes of the input | |
|
177 | // data. Prefer using |ConsumeRemainingBytes| unless you actually need a | |
|
178 | // std::string object. | |
|
179 | std::string ConsumeRemainingBytesAsString() | |
|
180 | { | |
|
181 | return ConsumeBytesAsString(remaining_bytes_); | |
|
182 | } | |
|
183 | ||
|
184 | // Returns a number in the range [Type's min, Type's max]. The value | |
|
185 | // might not be uniformly distributed in the given range. If there's no | |
|
186 | // input data left, always returns |min|. | |
|
187 | template <typename T> T ConsumeIntegral() | |
|
188 | { | |
|
189 | return ConsumeIntegralInRange(std::numeric_limits<T>::min(), | |
|
190 | std::numeric_limits<T>::max()); | |
|
191 | } | |
|
192 | ||
|
193 | // Reads one byte and returns a bool, or false when no data remains. | |
|
194 | bool ConsumeBool() | |
|
195 | { | |
|
196 | return 1 & ConsumeIntegral<uint8_t>(); | |
|
197 | } | |
|
198 | ||
|
199 | // Returns a copy of the value selected from the given fixed-size | |
|
200 | // |array|. | |
|
201 | template <typename T, size_t size> | |
|
202 | T PickValueInArray(const T (&array)[size]) | |
|
203 | { | |
|
204 | static_assert(size > 0, "The array must be non empty."); | |
|
205 | return array[ConsumeIntegralInRange<size_t>(0, size - 1)]; | |
|
206 | } | |
|
207 | ||
|
208 | template <typename T> | |
|
209 | T PickValueInArray(std::initializer_list<const T> list) | |
|
210 | { | |
|
211 | // TODO(Dor1s): switch to static_assert once C++14 is allowed. | |
|
212 | if (!list.size()) | |
|
213 | abort(); | |
|
214 | ||
|
215 | return *(list.begin() + | |
|
216 | ConsumeIntegralInRange<size_t>(0, list.size() - 1)); | |
|
217 | } | |
|
218 | ||
|
219 | // Returns an enum value. The enum must start at 0 and be contiguous. It | |
|
220 | // must also contain |kMaxValue| aliased to its largest (inclusive) | |
|
221 | // value. Such as: enum class Foo { SomeValue, OtherValue, kMaxValue = | |
|
222 | // OtherValue }; | |
|
223 | template <typename T> T ConsumeEnum() | |
|
224 | { | |
|
225 | static_assert(std::is_enum<T>::value, | |
|
226 | "|T| must be an enum type."); | |
|
227 | return static_cast<T>(ConsumeIntegralInRange<uint32_t>( | |
|
228 | 0, static_cast<uint32_t>(T::kMaxValue))); | |
|
229 | } | |
|
230 | ||
|
231 | // Returns a floating point number in the range [0.0, 1.0]. If there's | |
|
232 | // no input data left, always returns 0. | |
|
233 | template <typename T> T ConsumeProbability() | |
|
234 | { | |
|
235 | static_assert(std::is_floating_point<T>::value, | |
|
236 | "A floating point type is required."); | |
|
237 | ||
|
238 | // Use different integral types for different floating point | |
|
239 | // types in order to provide better density of the resulting | |
|
240 | // values. | |
|
241 | using IntegralType = | |
|
242 | typename std::conditional<(sizeof(T) <= sizeof(uint32_t)), | |
|
243 | uint32_t, uint64_t>::type; | |
|
244 | ||
|
245 | T result = static_cast<T>(ConsumeIntegral<IntegralType>()); | |
|
246 | result /= | |
|
247 | static_cast<T>(std::numeric_limits<IntegralType>::max()); | |
|
248 | return result; | |
|
249 | } | |
|
250 | ||
|
251 | // Returns a floating point value in the range [Type's lowest, Type's | |
|
252 | // max] by consuming bytes from the input data. If there's no input data | |
|
253 | // left, always returns approximately 0. | |
|
254 | template <typename T> T ConsumeFloatingPoint() | |
|
255 | { | |
|
256 | return ConsumeFloatingPointInRange<T>( | |
|
257 | std::numeric_limits<T>::lowest(), | |
|
258 | std::numeric_limits<T>::max()); | |
|
259 | } | |
|
260 | ||
|
261 | // Returns a floating point value in the given range by consuming bytes | |
|
262 | // from the input data. If there's no input data left, returns |min|. | |
|
263 | // Note that |min| must be less than or equal to |max|. | |
|
264 | template <typename T> T ConsumeFloatingPointInRange(T min, T max) | |
|
265 | { | |
|
266 | if (min > max) | |
|
267 | abort(); | |
|
268 | ||
|
269 | T range = .0; | |
|
270 | T result = min; | |
|
271 | constexpr T zero(.0); | |
|
272 | if (max > zero && min < zero && | |
|
273 | max > min + std::numeric_limits<T>::max()) { | |
|
274 | // The diff |max - min| would overflow the given | |
|
275 | // floating point type. Use the half of the diff as the | |
|
276 | // range and consume a bool to decide whether the result | |
|
277 | // is in the first of the second part of the diff. | |
|
278 | range = (max / 2.0) - (min / 2.0); | |
|
279 | if (ConsumeBool()) { | |
|
280 | result += range; | |
|
281 | } | |
|
282 | } else { | |
|
283 | range = max - min; | |
|
284 | } | |
|
285 | ||
|
286 | return result + range * ConsumeProbability<T>(); | |
|
287 | } | |
|
288 | ||
|
289 | // Reports the remaining bytes available for fuzzed input. | |
|
290 | size_t remaining_bytes() | |
|
291 | { | |
|
292 | return remaining_bytes_; | |
|
293 | } | |
|
294 | ||
|
295 | private: | |
|
296 | FuzzedDataProvider(const FuzzedDataProvider &) = delete; | |
|
297 | FuzzedDataProvider &operator=(const FuzzedDataProvider &) = delete; | |
|
298 | ||
|
299 | void Advance(size_t num_bytes) | |
|
300 | { | |
|
301 | if (num_bytes > remaining_bytes_) | |
|
302 | abort(); | |
|
303 | ||
|
304 | data_ptr_ += num_bytes; | |
|
305 | remaining_bytes_ -= num_bytes; | |
|
306 | } | |
|
307 | ||
|
308 | template <typename T> | |
|
309 | std::vector<T> ConsumeBytes(size_t size, size_t num_bytes_to_consume) | |
|
310 | { | |
|
311 | static_assert(sizeof(T) == sizeof(uint8_t), | |
|
312 | "Incompatible data type."); | |
|
313 | ||
|
314 | // The point of using the size-based constructor below is to | |
|
315 | // increase the odds of having a vector object with capacity | |
|
316 | // being equal to the length. That part is always implementation | |
|
317 | // specific, but at least both libc++ and libstdc++ allocate the | |
|
318 | // requested number of bytes in that constructor, which seems to | |
|
319 | // be a natural choice for other implementations as well. To | |
|
320 | // increase the odds even more, we also call |shrink_to_fit| | |
|
321 | // below. | |
|
322 | std::vector<T> result(size); | |
|
323 | if (size == 0) { | |
|
324 | if (num_bytes_to_consume != 0) | |
|
325 | abort(); | |
|
326 | return result; | |
|
327 | } | |
|
328 | ||
|
329 | std::memcpy(result.data(), data_ptr_, num_bytes_to_consume); | |
|
330 | Advance(num_bytes_to_consume); | |
|
331 | ||
|
332 | // Even though |shrink_to_fit| is also implementation specific, | |
|
333 | // we expect it to provide an additional assurance in case | |
|
334 | // vector's constructor allocated a buffer which is larger than | |
|
335 | // the actual amount of data we put inside it. | |
|
336 | result.shrink_to_fit(); | |
|
337 | return result; | |
|
338 | } | |
|
339 | ||
|
340 | template <typename TS, typename TU> TS ConvertUnsignedToSigned(TU value) | |
|
341 | { | |
|
342 | static_assert(sizeof(TS) == sizeof(TU), | |
|
343 | "Incompatible data types."); | |
|
344 | static_assert(!std::numeric_limits<TU>::is_signed, | |
|
345 | "Source type must be unsigned."); | |
|
346 | ||
|
347 | // TODO(Dor1s): change to `if constexpr` once C++17 becomes | |
|
348 | // mainstream. | |
|
349 | if (std::numeric_limits<TS>::is_modulo) | |
|
350 | return static_cast<TS>(value); | |
|
351 | ||
|
352 | // Avoid using implementation-defined unsigned to signer | |
|
353 | // conversions. To learn more, see | |
|
354 | // https://stackoverflow.com/questions/13150449. | |
|
355 | if (value <= std::numeric_limits<TS>::max()) { | |
|
356 | return static_cast<TS>(value); | |
|
357 | } else { | |
|
358 | constexpr auto TS_min = std::numeric_limits<TS>::min(); | |
|
359 | return TS_min + static_cast<char>(value - TS_min); | |
|
360 | } | |
|
361 | } | |
|
362 | ||
|
363 | const uint8_t *data_ptr_; | |
|
364 | size_t remaining_bytes_; | |
|
365 | }; | |
|
366 | ||
|
367 | #endif // LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_ | |
|
368 | // no-check-code since this is from a third party |
@@ -0,0 +1,41 b'' | |||
|
1 | // Copyright 2017 Google Inc. All Rights Reserved. | |
|
2 | // Licensed under the Apache License, Version 2.0 (the "License"); | |
|
3 | ||
|
4 | // Example of a standalone runner for "fuzz targets". | |
|
5 | // It reads all files passed as parameters and feeds their contents | |
|
6 | // one by one into the fuzz target (LLVMFuzzerTestOneInput). | |
|
7 | // This runner does not do any fuzzing, but allows us to run the fuzz target | |
|
8 | // on the test corpus (e.g. "do_stuff_test_data") or on a single file, | |
|
9 | // e.g. the one that comes from a bug report. | |
|
10 | ||
|
11 | #include <cassert> | |
|
12 | #include <fstream> | |
|
13 | #include <iostream> | |
|
14 | #include <vector> | |
|
15 | ||
|
16 | // Forward declare the "fuzz target" interface. | |
|
17 | // We deliberately keep this inteface simple and header-free. | |
|
18 | extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size); | |
|
19 | ||
|
20 | int main(int argc, char **argv) | |
|
21 | { | |
|
22 | for (int i = 1; i < argc; i++) { | |
|
23 | std::ifstream in(argv[i]); | |
|
24 | in.seekg(0, in.end); | |
|
25 | size_t length = in.tellg(); | |
|
26 | in.seekg(0, in.beg); | |
|
27 | std::cout << "Reading " << length << " bytes from " << argv[i] | |
|
28 | << std::endl; | |
|
29 | // Allocate exactly length bytes so that we reliably catch | |
|
30 | // buffer overflows. | |
|
31 | std::vector<char> bytes(length); | |
|
32 | in.read(bytes.data(), bytes.size()); | |
|
33 | assert(in); | |
|
34 | LLVMFuzzerTestOneInput( | |
|
35 | reinterpret_cast<const uint8_t *>(bytes.data()), | |
|
36 | bytes.size()); | |
|
37 | std::cout << "Execution successful" << std::endl; | |
|
38 | } | |
|
39 | return 0; | |
|
40 | } | |
|
41 | // no-check-code since this is from a third party |
@@ -1,7 +1,14 b'' | |||
|
1 | 1 | CC = clang |
|
2 | 2 | CXX = clang++ |
|
3 | 3 | |
|
4 | LIB_FUZZING_ENGINE ?= -lFuzzingEngine | |
|
4 | # By default, use our own standalone_fuzz_target_runner. | |
|
5 | # This runner does no fuzzing, but simply executes the inputs | |
|
6 | # provided via parameters. | |
|
7 | # Run e.g. "make all LIB_FUZZING_ENGINE=/path/to/libFuzzer.a" | |
|
8 | # to link the fuzzer(s) against a real fuzzing engine. | |
|
9 | # | |
|
10 | # OSS-Fuzz will define its own value for LIB_FUZZING_ENGINE. | |
|
11 | LIB_FUZZING_ENGINE ?= standalone_fuzz_target_runner.o | |
|
5 | 12 | |
|
6 | 13 | PYTHON_CONFIG ?= $$OUT/sanpy/bin/python-config |
|
7 | 14 | |
@@ -9,6 +16,8 b' CXXFLAGS += -Wno-deprecated-register' | |||
|
9 | 16 | |
|
10 | 17 | all: bdiff mpatch xdiff |
|
11 | 18 | |
|
19 | standalone_fuzz_target_runner.o: standalone_fuzz_target_runner.cc | |
|
20 | ||
|
12 | 21 | pyutil.o: pyutil.cc pyutil.h |
|
13 | 22 | $(CXX) $(CXXFLAGS) -g -O1 \ |
|
14 | 23 | `$(PYTHON_CONFIG) --cflags` \ |
@@ -9,7 +9,7 b'' | |||
|
9 | 9 | #include <memory> |
|
10 | 10 | #include <stdlib.h> |
|
11 | 11 | |
|
12 |
#include |
|
|
12 | #include "FuzzedDataProvider.h" | |
|
13 | 13 | |
|
14 | 14 | extern "C" { |
|
15 | 15 | #include "bdiff.h" |
@@ -5,9 +5,9 b'' | |||
|
5 | 5 | |
|
6 | 6 | #include "pyutil.h" |
|
7 | 7 | |
|
8 | #include <fuzzer/FuzzedDataProvider.h> | |
|
9 | 8 | #include <iostream> |
|
10 | 9 | #include <string> |
|
10 | #include "FuzzedDataProvider.h" | |
|
11 | 11 | |
|
12 | 12 | extern "C" { |
|
13 | 13 |
@@ -10,7 +10,7 b'' | |||
|
10 | 10 | #include <inttypes.h> |
|
11 | 11 | #include <stdlib.h> |
|
12 | 12 | |
|
13 |
#include |
|
|
13 | #include "FuzzedDataProvider.h" | |
|
14 | 14 | |
|
15 | 15 | extern "C" { |
|
16 | 16 |
@@ -21,6 +21,8 b' New errors are not allowed. Warnings are' | |||
|
21 | 21 | Skipping contrib/automation/hgautomation/try_server.py it has no-che?k-code (glob) |
|
22 | 22 | Skipping contrib/automation/hgautomation/windows.py it has no-che?k-code (glob) |
|
23 | 23 | Skipping contrib/automation/hgautomation/winrm.py it has no-che?k-code (glob) |
|
24 | Skipping contrib/fuzz/FuzzedDataProvider.h it has no-che?k-code (glob) | |
|
25 | Skipping contrib/fuzz/standalone_fuzz_target_runner.cc it has no-che?k-code (glob) | |
|
24 | 26 | Skipping contrib/packaging/hgpackaging/cli.py it has no-che?k-code (glob) |
|
25 | 27 | Skipping contrib/packaging/hgpackaging/downloads.py it has no-che?k-code (glob) |
|
26 | 28 | Skipping contrib/packaging/hgpackaging/inno.py it has no-che?k-code (glob) |
General Comments 0
You need to be logged in to leave comments.
Login now