##// END OF EJS Templates
rust-re2: add wrapper for calling Re2 from Rust...
Raphaël Gomès -
r44786:d8d4fa9a default
parent child Browse files
Show More
@@ -0,0 +1,25 b''
1 // build.rs
2 //
3 // Copyright 2020 Raphaël Gomès <rgomes@octobus.net>
4 //
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
7
8 #[cfg(feature = "with-re2")]
9 use cc;
10
11 #[cfg(feature = "with-re2")]
12 fn compile_re2() {
13 cc::Build::new()
14 .cpp(true)
15 .flag("-std=c++11")
16 .file("src/re2/rust_re2.cpp")
17 .compile("librustre.a");
18
19 println!("cargo:rustc-link-lib=re2");
20 }
21
22 fn main() {
23 #[cfg(feature = "with-re2")]
24 compile_re2();
25 }
@@ -0,0 +1,21 b''
1 /// re2 module
2 ///
3 /// The Python implementation of Mercurial uses the Re2 regex engine when
4 /// possible and if the bindings are installed, falling back to Python's `re`
5 /// in case of unsupported syntax (Re2 is a non-backtracking engine).
6 ///
7 /// Using it from Rust is not ideal. We need C++ bindings, a C++ compiler,
8 /// Re2 needs to be installed... why not just use the `regex` crate?
9 ///
10 /// Using Re2 from the Rust implementation guarantees backwards compatibility.
11 /// We know it will work out of the box without needing to figure out the
12 /// subtle differences in syntax. For example, `regex` currently does not
13 /// support empty alternations (regex like `a||b`) which happens more often
14 /// than we might think. Old benchmarks also showed worse performance from
15 /// regex than with Re2, but the methodology and results were lost, so take
16 /// this with a grain of salt.
17 ///
18 /// The idea is to use Re2 for now as a temporary phase and then investigate
19 /// how much work would be needed to use `regex`.
20 mod re2;
21 pub use re2::Re2;
@@ -0,0 +1,66 b''
1 /*
2 re2.rs
3
4 Rust FFI bindings to Re2.
5
6 Copyright 2020 Valentin Gatien-Baron
7
8 This software may be used and distributed according to the terms of the
9 GNU General Public License version 2 or any later version.
10 */
11 use libc::{c_int, c_void};
12
13 type Re2Ptr = *const c_void;
14
15 pub struct Re2(Re2Ptr);
16
17 /// `re2.h` says:
18 /// "An "RE2" object is safe for concurrent use by multiple threads."
19 unsafe impl Sync for Re2 {}
20
21 /// These bind to the C ABI in `rust_re2.cpp`.
22 extern "C" {
23 fn rust_re2_create(data: *const u8, len: usize) -> Re2Ptr;
24 fn rust_re2_destroy(re2: Re2Ptr);
25 fn rust_re2_ok(re2: Re2Ptr) -> bool;
26 fn rust_re2_error(
27 re2: Re2Ptr,
28 outdata: *mut *const u8,
29 outlen: *mut usize,
30 ) -> bool;
31 fn rust_re2_match(
32 re2: Re2Ptr,
33 data: *const u8,
34 len: usize,
35 anchor: c_int,
36 ) -> bool;
37 }
38
39 impl Re2 {
40 pub fn new(pattern: &[u8]) -> Result<Re2, String> {
41 unsafe {
42 let re2 = rust_re2_create(pattern.as_ptr(), pattern.len());
43 if rust_re2_ok(re2) {
44 Ok(Re2(re2))
45 } else {
46 let mut data: *const u8 = std::ptr::null();
47 let mut len: usize = 0;
48 rust_re2_error(re2, &mut data, &mut len);
49 Err(String::from_utf8_lossy(std::slice::from_raw_parts(
50 data, len,
51 ))
52 .to_string())
53 }
54 }
55 }
56
57 pub fn is_match(&self, data: &[u8]) -> bool {
58 unsafe { rust_re2_match(self.0, data.as_ptr(), data.len(), 1) }
59 }
60 }
61
62 impl Drop for Re2 {
63 fn drop(&mut self) {
64 unsafe { rust_re2_destroy(self.0) }
65 }
66 }
@@ -0,0 +1,49 b''
1 /*
2 rust_re2.cpp
3
4 C ABI export of Re2's C++ interface for Rust FFI.
5
6 Copyright 2020 Valentin Gatien-Baron
7
8 This software may be used and distributed according to the terms of the
9 GNU General Public License version 2 or any later version.
10 */
11
12 #include <re2/re2.h>
13 using namespace re2;
14
15 extern "C" {
16 RE2* rust_re2_create(const char* data, size_t len) {
17 RE2::Options o;
18 o.set_encoding(RE2::Options::Encoding::EncodingLatin1);
19 o.set_log_errors(false);
20 o.set_max_mem(50000000);
21
22 return new RE2(StringPiece(data, len), o);
23 }
24
25 void rust_re2_destroy(RE2* re) {
26 delete re;
27 }
28
29 bool rust_re2_ok(RE2* re) {
30 return re->ok();
31 }
32
33 void rust_re2_error(RE2* re, const char** outdata, size_t* outlen) {
34 const std::string& e = re->error();
35 *outdata = e.data();
36 *outlen = e.length();
37 }
38
39 bool rust_re2_match(RE2* re, char* data, size_t len, int ianchor) {
40 const StringPiece sp = StringPiece(data, len);
41
42 RE2::Anchor anchor =
43 ianchor == 0 ? RE2::Anchor::UNANCHORED :
44 (ianchor == 1 ? RE2::Anchor::ANCHOR_START :
45 RE2::Anchor::ANCHOR_BOTH);
46
47 return re->Match(sp, 0, len, anchor, NULL, 0);
48 }
49 }
@@ -45,6 +45,11 b' dependencies = ['
45 45 ]
46 46
47 47 [[package]]
48 name = "cc"
49 version = "1.0.50"
50 source = "registry+https://github.com/rust-lang/crates.io-index"
51
52 [[package]]
48 53 name = "cfg-if"
49 54 version = "0.1.10"
50 55 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -161,8 +166,10 b' name = "hg-core"'
161 166 version = "0.1.0"
162 167 dependencies = [
163 168 "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
169 "cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)",
164 170 "hex 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
165 171 "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
172 "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)",
166 173 "memchr 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
167 174 "pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
168 175 "rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -599,6 +606,7 b' source = "registry+https://github.com/ru'
599 606 "checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
600 607 "checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5"
601 608 "checksum c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb"
609 "checksum cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)" = "95e28fa049fda1c330bcf9d723be7663a899c4679724b34c81e9f5a326aab8cd"
602 610 "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
603 611 "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
604 612 "checksum cpython 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bfaf3847ab963e40c4f6dd8d6be279bdf74007ae2413786a0dcbb28c52139a95"
@@ -4,6 +4,7 b' version = "0.1.0"'
4 4 authors = ["Georges Racinet <gracinet@anybox.fr>"]
5 5 description = "Mercurial pure Rust core library, with no assumption on Python bindings (FFI)"
6 6 edition = "2018"
7 build = "build.rs"
7 8
8 9 [lib]
9 10 name = "hg"
@@ -12,6 +13,7 b' name = "hg"'
12 13 byteorder = "1.3.1"
13 14 hex = "0.4.0"
14 15 lazy_static = "1.3.0"
16 libc = { version = "0.2.66", optional = true }
15 17 memchr = "2.2.0"
16 18 rand = "0.6.5"
17 19 rand_pcg = "0.1.1"
@@ -22,4 +24,11 b' same-file = "1.0.6"'
22 24
23 25 [dev-dependencies]
24 26 tempfile = "3.1.0"
25 pretty_assertions = "0.6.1" No newline at end of file
27 pretty_assertions = "0.6.1"
28
29 [build-dependencies]
30 cc = { version = "1.0.48", optional = true }
31
32 [features]
33 default = []
34 with-re2 = ["cc", "libc"]
@@ -21,6 +21,8 b' mod filepatterns;'
21 21 pub mod matchers;
22 22 pub mod revlog;
23 23 pub use revlog::*;
24 #[cfg(feature = "with-re2")]
25 pub mod re2;
24 26 pub mod utils;
25 27
26 28 use crate::utils::hg_path::{HgPathBuf, HgPathError};
@@ -10,6 +10,7 b' crate-type = ["cdylib"]'
10 10
11 11 [features]
12 12 default = ["python27"]
13 with-re2 = ["hg-core/with-re2"]
13 14
14 15 # Features to build an extension module:
15 16 python27 = ["cpython/python27-sys", "cpython/extension-module-2-7"]
@@ -1351,10 +1351,19 b' class RustExtension(Extension):'
1351 1351 env['HOME'] = pwd.getpwuid(os.getuid()).pw_dir
1352 1352
1353 1353 cargocmd = ['cargo', 'rustc', '-vv', '--release']
1354
1355 feature_flags = []
1356
1354 1357 if sys.version_info[0] == 3 and self.py3_features is not None:
1355 cargocmd.extend(
1356 ('--features', self.py3_features, '--no-default-features')
1357 )
1358 feature_flags.append(self.py3_features)
1359 cargocmd.append('--no-default-features')
1360
1361 rust_features = env.get("HG_RUST_FEATURES")
1362 if rust_features:
1363 feature_flags.append(rust_features)
1364
1365 cargocmd.extend(('--features', " ".join(feature_flags)))
1366
1358 1367 cargocmd.append('--')
1359 1368 if sys.platform == 'darwin':
1360 1369 cargocmd.extend(
General Comments 0
You need to be logged in to leave comments. Login now