Show More
@@ -0,0 +1,25 b'' | |||||
|
1 | // build.rs | |||
|
2 | // | |||
|
3 | // Copyright 2020 Raphaël Gomès <rgomes@octobus.net> | |||
|
4 | // | |||
|
5 | // This software may be used and distributed according to the terms of the | |||
|
6 | // GNU General Public License version 2 or any later version. | |||
|
7 | ||||
|
8 | #[cfg(feature = "with-re2")] | |||
|
9 | use cc; | |||
|
10 | ||||
|
11 | #[cfg(feature = "with-re2")] | |||
|
12 | fn compile_re2() { | |||
|
13 | cc::Build::new() | |||
|
14 | .cpp(true) | |||
|
15 | .flag("-std=c++11") | |||
|
16 | .file("src/re2/rust_re2.cpp") | |||
|
17 | .compile("librustre.a"); | |||
|
18 | ||||
|
19 | println!("cargo:rustc-link-lib=re2"); | |||
|
20 | } | |||
|
21 | ||||
|
22 | fn main() { | |||
|
23 | #[cfg(feature = "with-re2")] | |||
|
24 | compile_re2(); | |||
|
25 | } |
@@ -0,0 +1,21 b'' | |||||
|
1 | /// re2 module | |||
|
2 | /// | |||
|
3 | /// The Python implementation of Mercurial uses the Re2 regex engine when | |||
|
4 | /// possible and if the bindings are installed, falling back to Python's `re` | |||
|
5 | /// in case of unsupported syntax (Re2 is a non-backtracking engine). | |||
|
6 | /// | |||
|
7 | /// Using it from Rust is not ideal. We need C++ bindings, a C++ compiler, | |||
|
8 | /// Re2 needs to be installed... why not just use the `regex` crate? | |||
|
9 | /// | |||
|
10 | /// Using Re2 from the Rust implementation guarantees backwards compatibility. | |||
|
11 | /// We know it will work out of the box without needing to figure out the | |||
|
12 | /// subtle differences in syntax. For example, `regex` currently does not | |||
|
13 | /// support empty alternations (regex like `a||b`) which happens more often | |||
|
14 | /// than we might think. Old benchmarks also showed worse performance from | |||
|
15 | /// regex than with Re2, but the methodology and results were lost, so take | |||
|
16 | /// this with a grain of salt. | |||
|
17 | /// | |||
|
18 | /// The idea is to use Re2 for now as a temporary phase and then investigate | |||
|
19 | /// how much work would be needed to use `regex`. | |||
|
20 | mod re2; | |||
|
21 | pub use re2::Re2; |
@@ -0,0 +1,66 b'' | |||||
|
1 | /* | |||
|
2 | re2.rs | |||
|
3 | ||||
|
4 | Rust FFI bindings to Re2. | |||
|
5 | ||||
|
6 | Copyright 2020 Valentin Gatien-Baron | |||
|
7 | ||||
|
8 | This software may be used and distributed according to the terms of the | |||
|
9 | GNU General Public License version 2 or any later version. | |||
|
10 | */ | |||
|
11 | use libc::{c_int, c_void}; | |||
|
12 | ||||
|
13 | type Re2Ptr = *const c_void; | |||
|
14 | ||||
|
15 | pub struct Re2(Re2Ptr); | |||
|
16 | ||||
|
17 | /// `re2.h` says: | |||
|
18 | /// "An "RE2" object is safe for concurrent use by multiple threads." | |||
|
19 | unsafe impl Sync for Re2 {} | |||
|
20 | ||||
|
21 | /// These bind to the C ABI in `rust_re2.cpp`. | |||
|
22 | extern "C" { | |||
|
23 | fn rust_re2_create(data: *const u8, len: usize) -> Re2Ptr; | |||
|
24 | fn rust_re2_destroy(re2: Re2Ptr); | |||
|
25 | fn rust_re2_ok(re2: Re2Ptr) -> bool; | |||
|
26 | fn rust_re2_error( | |||
|
27 | re2: Re2Ptr, | |||
|
28 | outdata: *mut *const u8, | |||
|
29 | outlen: *mut usize, | |||
|
30 | ) -> bool; | |||
|
31 | fn rust_re2_match( | |||
|
32 | re2: Re2Ptr, | |||
|
33 | data: *const u8, | |||
|
34 | len: usize, | |||
|
35 | anchor: c_int, | |||
|
36 | ) -> bool; | |||
|
37 | } | |||
|
38 | ||||
|
39 | impl Re2 { | |||
|
40 | pub fn new(pattern: &[u8]) -> Result<Re2, String> { | |||
|
41 | unsafe { | |||
|
42 | let re2 = rust_re2_create(pattern.as_ptr(), pattern.len()); | |||
|
43 | if rust_re2_ok(re2) { | |||
|
44 | Ok(Re2(re2)) | |||
|
45 | } else { | |||
|
46 | let mut data: *const u8 = std::ptr::null(); | |||
|
47 | let mut len: usize = 0; | |||
|
48 | rust_re2_error(re2, &mut data, &mut len); | |||
|
49 | Err(String::from_utf8_lossy(std::slice::from_raw_parts( | |||
|
50 | data, len, | |||
|
51 | )) | |||
|
52 | .to_string()) | |||
|
53 | } | |||
|
54 | } | |||
|
55 | } | |||
|
56 | ||||
|
57 | pub fn is_match(&self, data: &[u8]) -> bool { | |||
|
58 | unsafe { rust_re2_match(self.0, data.as_ptr(), data.len(), 1) } | |||
|
59 | } | |||
|
60 | } | |||
|
61 | ||||
|
62 | impl Drop for Re2 { | |||
|
63 | fn drop(&mut self) { | |||
|
64 | unsafe { rust_re2_destroy(self.0) } | |||
|
65 | } | |||
|
66 | } |
@@ -0,0 +1,49 b'' | |||||
|
1 | /* | |||
|
2 | rust_re2.cpp | |||
|
3 | ||||
|
4 | C ABI export of Re2's C++ interface for Rust FFI. | |||
|
5 | ||||
|
6 | Copyright 2020 Valentin Gatien-Baron | |||
|
7 | ||||
|
8 | This software may be used and distributed according to the terms of the | |||
|
9 | GNU General Public License version 2 or any later version. | |||
|
10 | */ | |||
|
11 | ||||
|
12 | #include <re2/re2.h> | |||
|
13 | using namespace re2; | |||
|
14 | ||||
|
15 | extern "C" { | |||
|
16 | RE2* rust_re2_create(const char* data, size_t len) { | |||
|
17 | RE2::Options o; | |||
|
18 | o.set_encoding(RE2::Options::Encoding::EncodingLatin1); | |||
|
19 | o.set_log_errors(false); | |||
|
20 | o.set_max_mem(50000000); | |||
|
21 | ||||
|
22 | return new RE2(StringPiece(data, len), o); | |||
|
23 | } | |||
|
24 | ||||
|
25 | void rust_re2_destroy(RE2* re) { | |||
|
26 | delete re; | |||
|
27 | } | |||
|
28 | ||||
|
29 | bool rust_re2_ok(RE2* re) { | |||
|
30 | return re->ok(); | |||
|
31 | } | |||
|
32 | ||||
|
33 | void rust_re2_error(RE2* re, const char** outdata, size_t* outlen) { | |||
|
34 | const std::string& e = re->error(); | |||
|
35 | *outdata = e.data(); | |||
|
36 | *outlen = e.length(); | |||
|
37 | } | |||
|
38 | ||||
|
39 | bool rust_re2_match(RE2* re, char* data, size_t len, int ianchor) { | |||
|
40 | const StringPiece sp = StringPiece(data, len); | |||
|
41 | ||||
|
42 | RE2::Anchor anchor = | |||
|
43 | ianchor == 0 ? RE2::Anchor::UNANCHORED : | |||
|
44 | (ianchor == 1 ? RE2::Anchor::ANCHOR_START : | |||
|
45 | RE2::Anchor::ANCHOR_BOTH); | |||
|
46 | ||||
|
47 | return re->Match(sp, 0, len, anchor, NULL, 0); | |||
|
48 | } | |||
|
49 | } |
@@ -45,6 +45,11 b' dependencies = [' | |||||
45 | ] |
|
45 | ] | |
46 |
|
46 | |||
47 | [[package]] |
|
47 | [[package]] | |
|
48 | name = "cc" | |||
|
49 | version = "1.0.50" | |||
|
50 | source = "registry+https://github.com/rust-lang/crates.io-index" | |||
|
51 | ||||
|
52 | [[package]] | |||
48 | name = "cfg-if" |
|
53 | name = "cfg-if" | |
49 | version = "0.1.10" |
|
54 | version = "0.1.10" | |
50 | source = "registry+https://github.com/rust-lang/crates.io-index" |
|
55 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
@@ -161,8 +166,10 b' name = "hg-core"' | |||||
161 | version = "0.1.0" |
|
166 | version = "0.1.0" | |
162 | dependencies = [ |
|
167 | dependencies = [ | |
163 | "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", |
|
168 | "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", | |
|
169 | "cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)", | |||
164 | "hex 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", |
|
170 | "hex 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", | |
165 | "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", |
|
171 | "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", | |
|
172 | "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", | |||
166 | "memchr 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)", |
|
173 | "memchr 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)", | |
167 | "pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", |
|
174 | "pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", | |
168 | "rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", |
|
175 | "rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", | |
@@ -599,6 +606,7 b' source = "registry+https://github.com/ru' | |||||
599 | "checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" |
|
606 | "checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" | |
600 | "checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" |
|
607 | "checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" | |
601 | "checksum c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb" |
|
608 | "checksum c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb" | |
|
609 | "checksum cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)" = "95e28fa049fda1c330bcf9d723be7663a899c4679724b34c81e9f5a326aab8cd" | |||
602 | "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" |
|
610 | "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" | |
603 | "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" |
|
611 | "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" | |
604 | "checksum cpython 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bfaf3847ab963e40c4f6dd8d6be279bdf74007ae2413786a0dcbb28c52139a95" |
|
612 | "checksum cpython 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bfaf3847ab963e40c4f6dd8d6be279bdf74007ae2413786a0dcbb28c52139a95" |
@@ -4,6 +4,7 b' version = "0.1.0"' | |||||
4 | authors = ["Georges Racinet <gracinet@anybox.fr>"] |
|
4 | authors = ["Georges Racinet <gracinet@anybox.fr>"] | |
5 | description = "Mercurial pure Rust core library, with no assumption on Python bindings (FFI)" |
|
5 | description = "Mercurial pure Rust core library, with no assumption on Python bindings (FFI)" | |
6 | edition = "2018" |
|
6 | edition = "2018" | |
|
7 | build = "build.rs" | |||
7 |
|
8 | |||
8 | [lib] |
|
9 | [lib] | |
9 | name = "hg" |
|
10 | name = "hg" | |
@@ -12,6 +13,7 b' name = "hg"' | |||||
12 | byteorder = "1.3.1" |
|
13 | byteorder = "1.3.1" | |
13 | hex = "0.4.0" |
|
14 | hex = "0.4.0" | |
14 | lazy_static = "1.3.0" |
|
15 | lazy_static = "1.3.0" | |
|
16 | libc = { version = "0.2.66", optional = true } | |||
15 | memchr = "2.2.0" |
|
17 | memchr = "2.2.0" | |
16 | rand = "0.6.5" |
|
18 | rand = "0.6.5" | |
17 | rand_pcg = "0.1.1" |
|
19 | rand_pcg = "0.1.1" | |
@@ -22,4 +24,11 b' same-file = "1.0.6"' | |||||
22 |
|
24 | |||
23 | [dev-dependencies] |
|
25 | [dev-dependencies] | |
24 | tempfile = "3.1.0" |
|
26 | tempfile = "3.1.0" | |
25 | pretty_assertions = "0.6.1" No newline at end of file |
|
27 | pretty_assertions = "0.6.1" | |
|
28 | ||||
|
29 | [build-dependencies] | |||
|
30 | cc = { version = "1.0.48", optional = true } | |||
|
31 | ||||
|
32 | [features] | |||
|
33 | default = [] | |||
|
34 | with-re2 = ["cc", "libc"] |
@@ -21,6 +21,8 b' mod filepatterns;' | |||||
21 | pub mod matchers; |
|
21 | pub mod matchers; | |
22 | pub mod revlog; |
|
22 | pub mod revlog; | |
23 | pub use revlog::*; |
|
23 | pub use revlog::*; | |
|
24 | #[cfg(feature = "with-re2")] | |||
|
25 | pub mod re2; | |||
24 | pub mod utils; |
|
26 | pub mod utils; | |
25 |
|
27 | |||
26 | use crate::utils::hg_path::{HgPathBuf, HgPathError}; |
|
28 | use crate::utils::hg_path::{HgPathBuf, HgPathError}; |
@@ -10,6 +10,7 b' crate-type = ["cdylib"]' | |||||
10 |
|
10 | |||
11 | [features] |
|
11 | [features] | |
12 | default = ["python27"] |
|
12 | default = ["python27"] | |
|
13 | with-re2 = ["hg-core/with-re2"] | |||
13 |
|
14 | |||
14 | # Features to build an extension module: |
|
15 | # Features to build an extension module: | |
15 | python27 = ["cpython/python27-sys", "cpython/extension-module-2-7"] |
|
16 | python27 = ["cpython/python27-sys", "cpython/extension-module-2-7"] |
@@ -1351,10 +1351,19 b' class RustExtension(Extension):' | |||||
1351 | env['HOME'] = pwd.getpwuid(os.getuid()).pw_dir |
|
1351 | env['HOME'] = pwd.getpwuid(os.getuid()).pw_dir | |
1352 |
|
1352 | |||
1353 | cargocmd = ['cargo', 'rustc', '-vv', '--release'] |
|
1353 | cargocmd = ['cargo', 'rustc', '-vv', '--release'] | |
|
1354 | ||||
|
1355 | feature_flags = [] | |||
|
1356 | ||||
1354 | if sys.version_info[0] == 3 and self.py3_features is not None: |
|
1357 | if sys.version_info[0] == 3 and self.py3_features is not None: | |
1355 | cargocmd.extend( |
|
1358 | feature_flags.append(self.py3_features) | |
1356 | ('--features', self.py3_features, '--no-default-features') |
|
1359 | cargocmd.append('--no-default-features') | |
1357 | ) |
|
1360 | ||
|
1361 | rust_features = env.get("HG_RUST_FEATURES") | |||
|
1362 | if rust_features: | |||
|
1363 | feature_flags.append(rust_features) | |||
|
1364 | ||||
|
1365 | cargocmd.extend(('--features', " ".join(feature_flags))) | |||
|
1366 | ||||
1358 | cargocmd.append('--') |
|
1367 | cargocmd.append('--') | |
1359 | if sys.platform == 'darwin': |
|
1368 | if sys.platform == 'darwin': | |
1360 | cargocmd.extend( |
|
1369 | cargocmd.extend( |
General Comments 0
You need to be logged in to leave comments.
Login now