Show More
@@ -0,0 +1,25 b'' | |||
|
1 | // build.rs | |
|
2 | // | |
|
3 | // Copyright 2020 Raphaël Gomès <rgomes@octobus.net> | |
|
4 | // | |
|
5 | // This software may be used and distributed according to the terms of the | |
|
6 | // GNU General Public License version 2 or any later version. | |
|
7 | ||
|
8 | #[cfg(feature = "with-re2")] | |
|
9 | use cc; | |
|
10 | ||
|
11 | #[cfg(feature = "with-re2")] | |
|
12 | fn compile_re2() { | |
|
13 | cc::Build::new() | |
|
14 | .cpp(true) | |
|
15 | .flag("-std=c++11") | |
|
16 | .file("src/re2/rust_re2.cpp") | |
|
17 | .compile("librustre.a"); | |
|
18 | ||
|
19 | println!("cargo:rustc-link-lib=re2"); | |
|
20 | } | |
|
21 | ||
|
22 | fn main() { | |
|
23 | #[cfg(feature = "with-re2")] | |
|
24 | compile_re2(); | |
|
25 | } |
@@ -0,0 +1,21 b'' | |||
|
1 | /// re2 module | |
|
2 | /// | |
|
3 | /// The Python implementation of Mercurial uses the Re2 regex engine when | |
|
4 | /// possible and if the bindings are installed, falling back to Python's `re` | |
|
5 | /// in case of unsupported syntax (Re2 is a non-backtracking engine). | |
|
6 | /// | |
|
7 | /// Using it from Rust is not ideal. We need C++ bindings, a C++ compiler, | |
|
8 | /// Re2 needs to be installed... why not just use the `regex` crate? | |
|
9 | /// | |
|
10 | /// Using Re2 from the Rust implementation guarantees backwards compatibility. | |
|
11 | /// We know it will work out of the box without needing to figure out the | |
|
12 | /// subtle differences in syntax. For example, `regex` currently does not | |
|
13 | /// support empty alternations (regex like `a||b`) which happens more often | |
|
14 | /// than we might think. Old benchmarks also showed worse performance from | |
|
15 | /// regex than with Re2, but the methodology and results were lost, so take | |
|
16 | /// this with a grain of salt. | |
|
17 | /// | |
|
18 | /// The idea is to use Re2 for now as a temporary phase and then investigate | |
|
19 | /// how much work would be needed to use `regex`. | |
|
20 | mod re2; | |
|
21 | pub use re2::Re2; |
@@ -0,0 +1,66 b'' | |||
|
1 | /* | |
|
2 | re2.rs | |
|
3 | ||
|
4 | Rust FFI bindings to Re2. | |
|
5 | ||
|
6 | Copyright 2020 Valentin Gatien-Baron | |
|
7 | ||
|
8 | This software may be used and distributed according to the terms of the | |
|
9 | GNU General Public License version 2 or any later version. | |
|
10 | */ | |
|
11 | use libc::{c_int, c_void}; | |
|
12 | ||
|
13 | type Re2Ptr = *const c_void; | |
|
14 | ||
|
15 | pub struct Re2(Re2Ptr); | |
|
16 | ||
|
17 | /// `re2.h` says: | |
|
18 | /// "An "RE2" object is safe for concurrent use by multiple threads." | |
|
19 | unsafe impl Sync for Re2 {} | |
|
20 | ||
|
21 | /// These bind to the C ABI in `rust_re2.cpp`. | |
|
22 | extern "C" { | |
|
23 | fn rust_re2_create(data: *const u8, len: usize) -> Re2Ptr; | |
|
24 | fn rust_re2_destroy(re2: Re2Ptr); | |
|
25 | fn rust_re2_ok(re2: Re2Ptr) -> bool; | |
|
26 | fn rust_re2_error( | |
|
27 | re2: Re2Ptr, | |
|
28 | outdata: *mut *const u8, | |
|
29 | outlen: *mut usize, | |
|
30 | ) -> bool; | |
|
31 | fn rust_re2_match( | |
|
32 | re2: Re2Ptr, | |
|
33 | data: *const u8, | |
|
34 | len: usize, | |
|
35 | anchor: c_int, | |
|
36 | ) -> bool; | |
|
37 | } | |
|
38 | ||
|
39 | impl Re2 { | |
|
40 | pub fn new(pattern: &[u8]) -> Result<Re2, String> { | |
|
41 | unsafe { | |
|
42 | let re2 = rust_re2_create(pattern.as_ptr(), pattern.len()); | |
|
43 | if rust_re2_ok(re2) { | |
|
44 | Ok(Re2(re2)) | |
|
45 | } else { | |
|
46 | let mut data: *const u8 = std::ptr::null(); | |
|
47 | let mut len: usize = 0; | |
|
48 | rust_re2_error(re2, &mut data, &mut len); | |
|
49 | Err(String::from_utf8_lossy(std::slice::from_raw_parts( | |
|
50 | data, len, | |
|
51 | )) | |
|
52 | .to_string()) | |
|
53 | } | |
|
54 | } | |
|
55 | } | |
|
56 | ||
|
57 | pub fn is_match(&self, data: &[u8]) -> bool { | |
|
58 | unsafe { rust_re2_match(self.0, data.as_ptr(), data.len(), 1) } | |
|
59 | } | |
|
60 | } | |
|
61 | ||
|
62 | impl Drop for Re2 { | |
|
63 | fn drop(&mut self) { | |
|
64 | unsafe { rust_re2_destroy(self.0) } | |
|
65 | } | |
|
66 | } |
@@ -0,0 +1,49 b'' | |||
|
1 | /* | |
|
2 | rust_re2.cpp | |
|
3 | ||
|
4 | C ABI export of Re2's C++ interface for Rust FFI. | |
|
5 | ||
|
6 | Copyright 2020 Valentin Gatien-Baron | |
|
7 | ||
|
8 | This software may be used and distributed according to the terms of the | |
|
9 | GNU General Public License version 2 or any later version. | |
|
10 | */ | |
|
11 | ||
|
12 | #include <re2/re2.h> | |
|
13 | using namespace re2; | |
|
14 | ||
|
15 | extern "C" { | |
|
16 | RE2* rust_re2_create(const char* data, size_t len) { | |
|
17 | RE2::Options o; | |
|
18 | o.set_encoding(RE2::Options::Encoding::EncodingLatin1); | |
|
19 | o.set_log_errors(false); | |
|
20 | o.set_max_mem(50000000); | |
|
21 | ||
|
22 | return new RE2(StringPiece(data, len), o); | |
|
23 | } | |
|
24 | ||
|
25 | void rust_re2_destroy(RE2* re) { | |
|
26 | delete re; | |
|
27 | } | |
|
28 | ||
|
29 | bool rust_re2_ok(RE2* re) { | |
|
30 | return re->ok(); | |
|
31 | } | |
|
32 | ||
|
33 | void rust_re2_error(RE2* re, const char** outdata, size_t* outlen) { | |
|
34 | const std::string& e = re->error(); | |
|
35 | *outdata = e.data(); | |
|
36 | *outlen = e.length(); | |
|
37 | } | |
|
38 | ||
|
39 | bool rust_re2_match(RE2* re, char* data, size_t len, int ianchor) { | |
|
40 | const StringPiece sp = StringPiece(data, len); | |
|
41 | ||
|
42 | RE2::Anchor anchor = | |
|
43 | ianchor == 0 ? RE2::Anchor::UNANCHORED : | |
|
44 | (ianchor == 1 ? RE2::Anchor::ANCHOR_START : | |
|
45 | RE2::Anchor::ANCHOR_BOTH); | |
|
46 | ||
|
47 | return re->Match(sp, 0, len, anchor, NULL, 0); | |
|
48 | } | |
|
49 | } |
@@ -45,6 +45,11 b' dependencies = [' | |||
|
45 | 45 | ] |
|
46 | 46 | |
|
47 | 47 | [[package]] |
|
48 | name = "cc" | |
|
49 | version = "1.0.50" | |
|
50 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
51 | ||
|
52 | [[package]] | |
|
48 | 53 | name = "cfg-if" |
|
49 | 54 | version = "0.1.10" |
|
50 | 55 | source = "registry+https://github.com/rust-lang/crates.io-index" |
@@ -161,8 +166,10 b' name = "hg-core"' | |||
|
161 | 166 | version = "0.1.0" |
|
162 | 167 | dependencies = [ |
|
163 | 168 | "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", |
|
169 | "cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)", | |
|
164 | 170 | "hex 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", |
|
165 | 171 | "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", |
|
172 | "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", | |
|
166 | 173 | "memchr 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)", |
|
167 | 174 | "pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", |
|
168 | 175 | "rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", |
@@ -599,6 +606,7 b' source = "registry+https://github.com/ru' | |||
|
599 | 606 | "checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" |
|
600 | 607 | "checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" |
|
601 | 608 | "checksum c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb" |
|
609 | "checksum cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)" = "95e28fa049fda1c330bcf9d723be7663a899c4679724b34c81e9f5a326aab8cd" | |
|
602 | 610 | "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" |
|
603 | 611 | "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" |
|
604 | 612 | "checksum cpython 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bfaf3847ab963e40c4f6dd8d6be279bdf74007ae2413786a0dcbb28c52139a95" |
@@ -4,6 +4,7 b' version = "0.1.0"' | |||
|
4 | 4 | authors = ["Georges Racinet <gracinet@anybox.fr>"] |
|
5 | 5 | description = "Mercurial pure Rust core library, with no assumption on Python bindings (FFI)" |
|
6 | 6 | edition = "2018" |
|
7 | build = "build.rs" | |
|
7 | 8 | |
|
8 | 9 | [lib] |
|
9 | 10 | name = "hg" |
@@ -12,6 +13,7 b' name = "hg"' | |||
|
12 | 13 | byteorder = "1.3.1" |
|
13 | 14 | hex = "0.4.0" |
|
14 | 15 | lazy_static = "1.3.0" |
|
16 | libc = { version = "0.2.66", optional = true } | |
|
15 | 17 | memchr = "2.2.0" |
|
16 | 18 | rand = "0.6.5" |
|
17 | 19 | rand_pcg = "0.1.1" |
@@ -22,4 +24,11 b' same-file = "1.0.6"' | |||
|
22 | 24 | |
|
23 | 25 | [dev-dependencies] |
|
24 | 26 | tempfile = "3.1.0" |
|
25 | pretty_assertions = "0.6.1" No newline at end of file | |
|
27 | pretty_assertions = "0.6.1" | |
|
28 | ||
|
29 | [build-dependencies] | |
|
30 | cc = { version = "1.0.48", optional = true } | |
|
31 | ||
|
32 | [features] | |
|
33 | default = [] | |
|
34 | with-re2 = ["cc", "libc"] |
@@ -21,6 +21,8 b' mod filepatterns;' | |||
|
21 | 21 | pub mod matchers; |
|
22 | 22 | pub mod revlog; |
|
23 | 23 | pub use revlog::*; |
|
24 | #[cfg(feature = "with-re2")] | |
|
25 | pub mod re2; | |
|
24 | 26 | pub mod utils; |
|
25 | 27 | |
|
26 | 28 | use crate::utils::hg_path::{HgPathBuf, HgPathError}; |
@@ -10,6 +10,7 b' crate-type = ["cdylib"]' | |||
|
10 | 10 | |
|
11 | 11 | [features] |
|
12 | 12 | default = ["python27"] |
|
13 | with-re2 = ["hg-core/with-re2"] | |
|
13 | 14 | |
|
14 | 15 | # Features to build an extension module: |
|
15 | 16 | python27 = ["cpython/python27-sys", "cpython/extension-module-2-7"] |
@@ -21,7 +22,7 b' python27-bin = ["cpython/python27-sys"]' | |||
|
21 | 22 | python3-bin = ["cpython/python3-sys"] |
|
22 | 23 | |
|
23 | 24 | [dependencies] |
|
24 |
hg-core = { path = "../hg-core" |
|
|
25 | hg-core = { path = "../hg-core"} | |
|
25 | 26 | libc = '*' |
|
26 | 27 | |
|
27 | 28 | [dependencies.cpython] |
@@ -1351,10 +1351,19 b' class RustExtension(Extension):' | |||
|
1351 | 1351 | env['HOME'] = pwd.getpwuid(os.getuid()).pw_dir |
|
1352 | 1352 | |
|
1353 | 1353 | cargocmd = ['cargo', 'rustc', '-vv', '--release'] |
|
1354 | ||
|
1355 | feature_flags = [] | |
|
1356 | ||
|
1354 | 1357 | if sys.version_info[0] == 3 and self.py3_features is not None: |
|
1355 | cargocmd.extend( | |
|
1356 | ('--features', self.py3_features, '--no-default-features') | |
|
1357 | ) | |
|
1358 | feature_flags.append(self.py3_features) | |
|
1359 | cargocmd.append('--no-default-features') | |
|
1360 | ||
|
1361 | rust_features = env.get("HG_RUST_FEATURES") | |
|
1362 | if rust_features: | |
|
1363 | feature_flags.append(rust_features) | |
|
1364 | ||
|
1365 | cargocmd.extend(('--features', " ".join(feature_flags))) | |
|
1366 | ||
|
1358 | 1367 | cargocmd.append('--') |
|
1359 | 1368 | if sys.platform == 'darwin': |
|
1360 | 1369 | cargocmd.extend( |
General Comments 0
You need to be logged in to leave comments.
Login now