##// END OF EJS Templates
rust-pathauditor: actually populate the `audited_dirs` cache...
Raphaël Gomès -
r45021:8a237131 default
parent child Browse files
Show More
@@ -1,230 +1,231 b''
1 // path_auditor.rs
1 // path_auditor.rs
2 //
2 //
3 // Copyright 2020
3 // Copyright 2020
4 // Raphaël Gomès <rgomes@octobus.net>,
4 // Raphaël Gomès <rgomes@octobus.net>,
5 //
5 //
6 // This software may be used and distributed according to the terms of the
6 // This software may be used and distributed according to the terms of the
7 // GNU General Public License version 2 or any later version.
7 // GNU General Public License version 2 or any later version.
8
8
9 use crate::utils::{
9 use crate::utils::{
10 files::lower_clean,
10 files::lower_clean,
11 find_slice_in_slice,
11 find_slice_in_slice,
12 hg_path::{hg_path_to_path_buf, HgPath, HgPathBuf, HgPathError},
12 hg_path::{hg_path_to_path_buf, HgPath, HgPathBuf, HgPathError},
13 };
13 };
14 use std::collections::HashSet;
14 use std::collections::HashSet;
15 use std::path::{Path, PathBuf};
15 use std::path::{Path, PathBuf};
16
16
17 /// Ensures that a path is valid for use in the repository i.e. does not use
17 /// Ensures that a path is valid for use in the repository i.e. does not use
18 /// any banned components, does not traverse a symlink, etc.
18 /// any banned components, does not traverse a symlink, etc.
19 #[derive(Debug, Default)]
19 #[derive(Debug, Default)]
20 pub struct PathAuditor {
20 pub struct PathAuditor {
21 audited: HashSet<HgPathBuf>,
21 audited: HashSet<HgPathBuf>,
22 audited_dirs: HashSet<HgPathBuf>,
22 audited_dirs: HashSet<HgPathBuf>,
23 root: PathBuf,
23 root: PathBuf,
24 }
24 }
25
25
26 impl PathAuditor {
26 impl PathAuditor {
27 pub fn new(root: impl AsRef<Path>) -> Self {
27 pub fn new(root: impl AsRef<Path>) -> Self {
28 Self {
28 Self {
29 root: root.as_ref().to_owned(),
29 root: root.as_ref().to_owned(),
30 ..Default::default()
30 ..Default::default()
31 }
31 }
32 }
32 }
33 pub fn audit_path(
33 pub fn audit_path(
34 &mut self,
34 &mut self,
35 path: impl AsRef<HgPath>,
35 path: impl AsRef<HgPath>,
36 ) -> Result<(), HgPathError> {
36 ) -> Result<(), HgPathError> {
37 // TODO windows "localpath" normalization
37 // TODO windows "localpath" normalization
38 let path = path.as_ref();
38 let path = path.as_ref();
39 if path.is_empty() {
39 if path.is_empty() {
40 return Ok(());
40 return Ok(());
41 }
41 }
42 // TODO case normalization
42 // TODO case normalization
43 if self.audited.contains(path) {
43 if self.audited.contains(path) {
44 return Ok(());
44 return Ok(());
45 }
45 }
46 // AIX ignores "/" at end of path, others raise EISDIR.
46 // AIX ignores "/" at end of path, others raise EISDIR.
47 let last_byte = path.as_bytes()[path.len() - 1];
47 let last_byte = path.as_bytes()[path.len() - 1];
48 if last_byte == b'/' || last_byte == b'\\' {
48 if last_byte == b'/' || last_byte == b'\\' {
49 return Err(HgPathError::EndsWithSlash(path.to_owned()));
49 return Err(HgPathError::EndsWithSlash(path.to_owned()));
50 }
50 }
51 let parts: Vec<_> = path
51 let parts: Vec<_> = path
52 .as_bytes()
52 .as_bytes()
53 .split(|b| std::path::is_separator(*b as char))
53 .split(|b| std::path::is_separator(*b as char))
54 .collect();
54 .collect();
55
55
56 let first_component = lower_clean(parts[0]);
56 let first_component = lower_clean(parts[0]);
57 let first_component = first_component.as_slice();
57 let first_component = first_component.as_slice();
58 if !path.split_drive().0.is_empty()
58 if !path.split_drive().0.is_empty()
59 || (first_component == b".hg"
59 || (first_component == b".hg"
60 || first_component == b".hg."
60 || first_component == b".hg."
61 || first_component == b"")
61 || first_component == b"")
62 || parts.iter().any(|c| c == b"..")
62 || parts.iter().any(|c| c == b"..")
63 {
63 {
64 return Err(HgPathError::InsideDotHg(path.to_owned()));
64 return Err(HgPathError::InsideDotHg(path.to_owned()));
65 }
65 }
66
66
67 // Windows shortname aliases
67 // Windows shortname aliases
68 for part in parts.iter() {
68 for part in parts.iter() {
69 if part.contains(&b'~') {
69 if part.contains(&b'~') {
70 let mut split = part.splitn(2, |b| *b == b'~');
70 let mut split = part.splitn(2, |b| *b == b'~');
71 let first =
71 let first =
72 split.next().unwrap().to_owned().to_ascii_uppercase();
72 split.next().unwrap().to_owned().to_ascii_uppercase();
73 let last = split.next().unwrap();
73 let last = split.next().unwrap();
74 if last.iter().all(u8::is_ascii_digit)
74 if last.iter().all(u8::is_ascii_digit)
75 && (first == b"HG" || first == b"HG8B6C")
75 && (first == b"HG" || first == b"HG8B6C")
76 {
76 {
77 return Err(HgPathError::ContainsIllegalComponent(
77 return Err(HgPathError::ContainsIllegalComponent(
78 path.to_owned(),
78 path.to_owned(),
79 ));
79 ));
80 }
80 }
81 }
81 }
82 }
82 }
83 let lower_path = lower_clean(path.as_bytes());
83 let lower_path = lower_clean(path.as_bytes());
84 if find_slice_in_slice(&lower_path, b".hg").is_some() {
84 if find_slice_in_slice(&lower_path, b".hg").is_some() {
85 let lower_parts: Vec<_> = path
85 let lower_parts: Vec<_> = path
86 .as_bytes()
86 .as_bytes()
87 .split(|b| std::path::is_separator(*b as char))
87 .split(|b| std::path::is_separator(*b as char))
88 .collect();
88 .collect();
89 for pattern in [b".hg".to_vec(), b".hg.".to_vec()].iter() {
89 for pattern in [b".hg".to_vec(), b".hg.".to_vec()].iter() {
90 if let Some(pos) = lower_parts[1..]
90 if let Some(pos) = lower_parts[1..]
91 .iter()
91 .iter()
92 .position(|part| part == &pattern.as_slice())
92 .position(|part| part == &pattern.as_slice())
93 {
93 {
94 let base = lower_parts[..=pos]
94 let base = lower_parts[..=pos]
95 .iter()
95 .iter()
96 .fold(HgPathBuf::new(), |acc, p| {
96 .fold(HgPathBuf::new(), |acc, p| {
97 acc.join(HgPath::new(p))
97 acc.join(HgPath::new(p))
98 });
98 });
99 return Err(HgPathError::IsInsideNestedRepo {
99 return Err(HgPathError::IsInsideNestedRepo {
100 path: path.to_owned(),
100 path: path.to_owned(),
101 nested_repo: base,
101 nested_repo: base,
102 });
102 });
103 }
103 }
104 }
104 }
105 }
105 }
106
106
107 let parts = &parts[..parts.len().saturating_sub(1)];
107 let parts = &parts[..parts.len().saturating_sub(1)];
108
108
109 // We don't want to add "foo/bar/baz" to `audited_dirs` before checking
109 // We don't want to add "foo/bar/baz" to `audited_dirs` before checking
110 // if there's a "foo/.hg" directory. This also means we won't
110 // if there's a "foo/.hg" directory. This also means we won't
111 // accidentally traverse a symlink into some other filesystem (which
111 // accidentally traverse a symlink into some other filesystem (which
112 // is potentially expensive to access).
112 // is potentially expensive to access).
113 for index in 0..parts.len() {
113 for index in 0..parts.len() {
114 let prefix = &parts[..index + 1].join(&b'/');
114 let prefix = &parts[..index + 1].join(&b'/');
115 let prefix = HgPath::new(prefix);
115 let prefix = HgPath::new(prefix);
116 if self.audited_dirs.contains(prefix) {
116 if self.audited_dirs.contains(prefix) {
117 continue;
117 continue;
118 }
118 }
119 self.check_filesystem(&prefix, &path)?;
119 self.check_filesystem(&prefix, &path)?;
120 self.audited_dirs.insert(prefix.to_owned());
120 }
121 }
121
122
122 self.audited.insert(path.to_owned());
123 self.audited.insert(path.to_owned());
123
124
124 Ok(())
125 Ok(())
125 }
126 }
126
127
127 pub fn check_filesystem(
128 pub fn check_filesystem(
128 &self,
129 &self,
129 prefix: impl AsRef<HgPath>,
130 prefix: impl AsRef<HgPath>,
130 path: impl AsRef<HgPath>,
131 path: impl AsRef<HgPath>,
131 ) -> Result<(), HgPathError> {
132 ) -> Result<(), HgPathError> {
132 let prefix = prefix.as_ref();
133 let prefix = prefix.as_ref();
133 let path = path.as_ref();
134 let path = path.as_ref();
134 let current_path = self.root.join(
135 let current_path = self.root.join(
135 hg_path_to_path_buf(prefix)
136 hg_path_to_path_buf(prefix)
136 .map_err(|_| HgPathError::NotFsCompliant(path.to_owned()))?,
137 .map_err(|_| HgPathError::NotFsCompliant(path.to_owned()))?,
137 );
138 );
138 match std::fs::symlink_metadata(&current_path) {
139 match std::fs::symlink_metadata(&current_path) {
139 Err(e) => {
140 Err(e) => {
140 // EINVAL can be raised as invalid path syntax under win32.
141 // EINVAL can be raised as invalid path syntax under win32.
141 if e.kind() != std::io::ErrorKind::NotFound
142 if e.kind() != std::io::ErrorKind::NotFound
142 && e.kind() != std::io::ErrorKind::InvalidInput
143 && e.kind() != std::io::ErrorKind::InvalidInput
143 && e.raw_os_error() != Some(20)
144 && e.raw_os_error() != Some(20)
144 {
145 {
145 // Rust does not yet have an `ErrorKind` for
146 // Rust does not yet have an `ErrorKind` for
146 // `NotADirectory` (errno 20)
147 // `NotADirectory` (errno 20)
147 // It happens if the dirstate contains `foo/bar` and
148 // It happens if the dirstate contains `foo/bar` and
148 // foo is not a directory
149 // foo is not a directory
149 return Err(HgPathError::NotFsCompliant(path.to_owned()));
150 return Err(HgPathError::NotFsCompliant(path.to_owned()));
150 }
151 }
151 }
152 }
152 Ok(meta) => {
153 Ok(meta) => {
153 if meta.file_type().is_symlink() {
154 if meta.file_type().is_symlink() {
154 return Err(HgPathError::TraversesSymbolicLink {
155 return Err(HgPathError::TraversesSymbolicLink {
155 path: path.to_owned(),
156 path: path.to_owned(),
156 symlink: prefix.to_owned(),
157 symlink: prefix.to_owned(),
157 });
158 });
158 }
159 }
159 if meta.file_type().is_dir()
160 if meta.file_type().is_dir()
160 && current_path.join(".hg").is_dir()
161 && current_path.join(".hg").is_dir()
161 {
162 {
162 return Err(HgPathError::IsInsideNestedRepo {
163 return Err(HgPathError::IsInsideNestedRepo {
163 path: path.to_owned(),
164 path: path.to_owned(),
164 nested_repo: prefix.to_owned(),
165 nested_repo: prefix.to_owned(),
165 });
166 });
166 }
167 }
167 }
168 }
168 };
169 };
169
170
170 Ok(())
171 Ok(())
171 }
172 }
172
173
173 pub fn check(&mut self, path: impl AsRef<HgPath>) -> bool {
174 pub fn check(&mut self, path: impl AsRef<HgPath>) -> bool {
174 self.audit_path(path).is_ok()
175 self.audit_path(path).is_ok()
175 }
176 }
176 }
177 }
177
178
178 #[cfg(test)]
179 #[cfg(test)]
179 mod tests {
180 mod tests {
180 use super::*;
181 use super::*;
181 use crate::utils::files::get_path_from_bytes;
182 use crate::utils::files::get_path_from_bytes;
182 use crate::utils::hg_path::path_to_hg_path_buf;
183 use crate::utils::hg_path::path_to_hg_path_buf;
183
184
184 #[test]
185 #[test]
185 fn test_path_auditor() {
186 fn test_path_auditor() {
186 let mut auditor = PathAuditor::new(get_path_from_bytes(b"/tmp"));
187 let mut auditor = PathAuditor::new(get_path_from_bytes(b"/tmp"));
187
188
188 let path = HgPath::new(b".hg/00changelog.i");
189 let path = HgPath::new(b".hg/00changelog.i");
189 assert_eq!(
190 assert_eq!(
190 auditor.audit_path(path),
191 auditor.audit_path(path),
191 Err(HgPathError::InsideDotHg(path.to_owned()))
192 Err(HgPathError::InsideDotHg(path.to_owned()))
192 );
193 );
193 let path = HgPath::new(b"this/is/nested/.hg/thing.txt");
194 let path = HgPath::new(b"this/is/nested/.hg/thing.txt");
194 assert_eq!(
195 assert_eq!(
195 auditor.audit_path(path),
196 auditor.audit_path(path),
196 Err(HgPathError::IsInsideNestedRepo {
197 Err(HgPathError::IsInsideNestedRepo {
197 path: path.to_owned(),
198 path: path.to_owned(),
198 nested_repo: HgPathBuf::from_bytes(b"this/is/nested")
199 nested_repo: HgPathBuf::from_bytes(b"this/is/nested")
199 })
200 })
200 );
201 );
201
202
202 use std::fs::{create_dir, File};
203 use std::fs::{create_dir, File};
203 use tempfile::tempdir;
204 use tempfile::tempdir;
204
205
205 let base_dir = tempdir().unwrap();
206 let base_dir = tempdir().unwrap();
206 let base_dir_path = base_dir.path();
207 let base_dir_path = base_dir.path();
207 let a = base_dir_path.join("a");
208 let a = base_dir_path.join("a");
208 let b = base_dir_path.join("b");
209 let b = base_dir_path.join("b");
209 create_dir(&a).unwrap();
210 create_dir(&a).unwrap();
210 let in_a_path = a.join("in_a");
211 let in_a_path = a.join("in_a");
211 File::create(in_a_path).unwrap();
212 File::create(in_a_path).unwrap();
212
213
213 // TODO make portable
214 // TODO make portable
214 std::os::unix::fs::symlink(&a, &b).unwrap();
215 std::os::unix::fs::symlink(&a, &b).unwrap();
215
216
216 let buf = b.join("in_a").components().skip(2).collect::<PathBuf>();
217 let buf = b.join("in_a").components().skip(2).collect::<PathBuf>();
217 eprintln!("buf: {}", buf.display());
218 eprintln!("buf: {}", buf.display());
218 let path = path_to_hg_path_buf(buf).unwrap();
219 let path = path_to_hg_path_buf(buf).unwrap();
219 assert_eq!(
220 assert_eq!(
220 auditor.audit_path(&path),
221 auditor.audit_path(&path),
221 Err(HgPathError::TraversesSymbolicLink {
222 Err(HgPathError::TraversesSymbolicLink {
222 path: path,
223 path: path,
223 symlink: path_to_hg_path_buf(
224 symlink: path_to_hg_path_buf(
224 b.components().skip(2).collect::<PathBuf>()
225 b.components().skip(2).collect::<PathBuf>()
225 )
226 )
226 .unwrap()
227 .unwrap()
227 })
228 })
228 );
229 );
229 }
230 }
230 }
231 }
General Comments 0
You need to be logged in to leave comments. Login now