##// END OF EJS Templates
rust-pathauditor: actually populate the `audited_dirs` cache...
Raphaël Gomès -
r45021:8a237131 default
parent child Browse files
Show More
@@ -1,230 +1,231 b''
1 1 // path_auditor.rs
2 2 //
3 3 // Copyright 2020
4 4 // Raphaël Gomès <rgomes@octobus.net>,
5 5 //
6 6 // This software may be used and distributed according to the terms of the
7 7 // GNU General Public License version 2 or any later version.
8 8
9 9 use crate::utils::{
10 10 files::lower_clean,
11 11 find_slice_in_slice,
12 12 hg_path::{hg_path_to_path_buf, HgPath, HgPathBuf, HgPathError},
13 13 };
14 14 use std::collections::HashSet;
15 15 use std::path::{Path, PathBuf};
16 16
17 17 /// Ensures that a path is valid for use in the repository i.e. does not use
18 18 /// any banned components, does not traverse a symlink, etc.
19 19 #[derive(Debug, Default)]
20 20 pub struct PathAuditor {
21 21 audited: HashSet<HgPathBuf>,
22 22 audited_dirs: HashSet<HgPathBuf>,
23 23 root: PathBuf,
24 24 }
25 25
26 26 impl PathAuditor {
27 27 pub fn new(root: impl AsRef<Path>) -> Self {
28 28 Self {
29 29 root: root.as_ref().to_owned(),
30 30 ..Default::default()
31 31 }
32 32 }
33 33 pub fn audit_path(
34 34 &mut self,
35 35 path: impl AsRef<HgPath>,
36 36 ) -> Result<(), HgPathError> {
37 37 // TODO windows "localpath" normalization
38 38 let path = path.as_ref();
39 39 if path.is_empty() {
40 40 return Ok(());
41 41 }
42 42 // TODO case normalization
43 43 if self.audited.contains(path) {
44 44 return Ok(());
45 45 }
46 46 // AIX ignores "/" at end of path, others raise EISDIR.
47 47 let last_byte = path.as_bytes()[path.len() - 1];
48 48 if last_byte == b'/' || last_byte == b'\\' {
49 49 return Err(HgPathError::EndsWithSlash(path.to_owned()));
50 50 }
51 51 let parts: Vec<_> = path
52 52 .as_bytes()
53 53 .split(|b| std::path::is_separator(*b as char))
54 54 .collect();
55 55
56 56 let first_component = lower_clean(parts[0]);
57 57 let first_component = first_component.as_slice();
58 58 if !path.split_drive().0.is_empty()
59 59 || (first_component == b".hg"
60 60 || first_component == b".hg."
61 61 || first_component == b"")
62 62 || parts.iter().any(|c| c == b"..")
63 63 {
64 64 return Err(HgPathError::InsideDotHg(path.to_owned()));
65 65 }
66 66
67 67 // Windows shortname aliases
68 68 for part in parts.iter() {
69 69 if part.contains(&b'~') {
70 70 let mut split = part.splitn(2, |b| *b == b'~');
71 71 let first =
72 72 split.next().unwrap().to_owned().to_ascii_uppercase();
73 73 let last = split.next().unwrap();
74 74 if last.iter().all(u8::is_ascii_digit)
75 75 && (first == b"HG" || first == b"HG8B6C")
76 76 {
77 77 return Err(HgPathError::ContainsIllegalComponent(
78 78 path.to_owned(),
79 79 ));
80 80 }
81 81 }
82 82 }
83 83 let lower_path = lower_clean(path.as_bytes());
84 84 if find_slice_in_slice(&lower_path, b".hg").is_some() {
85 85 let lower_parts: Vec<_> = path
86 86 .as_bytes()
87 87 .split(|b| std::path::is_separator(*b as char))
88 88 .collect();
89 89 for pattern in [b".hg".to_vec(), b".hg.".to_vec()].iter() {
90 90 if let Some(pos) = lower_parts[1..]
91 91 .iter()
92 92 .position(|part| part == &pattern.as_slice())
93 93 {
94 94 let base = lower_parts[..=pos]
95 95 .iter()
96 96 .fold(HgPathBuf::new(), |acc, p| {
97 97 acc.join(HgPath::new(p))
98 98 });
99 99 return Err(HgPathError::IsInsideNestedRepo {
100 100 path: path.to_owned(),
101 101 nested_repo: base,
102 102 });
103 103 }
104 104 }
105 105 }
106 106
107 107 let parts = &parts[..parts.len().saturating_sub(1)];
108 108
109 109 // We don't want to add "foo/bar/baz" to `audited_dirs` before checking
110 110 // if there's a "foo/.hg" directory. This also means we won't
111 111 // accidentally traverse a symlink into some other filesystem (which
112 112 // is potentially expensive to access).
113 113 for index in 0..parts.len() {
114 114 let prefix = &parts[..index + 1].join(&b'/');
115 115 let prefix = HgPath::new(prefix);
116 116 if self.audited_dirs.contains(prefix) {
117 117 continue;
118 118 }
119 119 self.check_filesystem(&prefix, &path)?;
120 self.audited_dirs.insert(prefix.to_owned());
120 121 }
121 122
122 123 self.audited.insert(path.to_owned());
123 124
124 125 Ok(())
125 126 }
126 127
127 128 pub fn check_filesystem(
128 129 &self,
129 130 prefix: impl AsRef<HgPath>,
130 131 path: impl AsRef<HgPath>,
131 132 ) -> Result<(), HgPathError> {
132 133 let prefix = prefix.as_ref();
133 134 let path = path.as_ref();
134 135 let current_path = self.root.join(
135 136 hg_path_to_path_buf(prefix)
136 137 .map_err(|_| HgPathError::NotFsCompliant(path.to_owned()))?,
137 138 );
138 139 match std::fs::symlink_metadata(&current_path) {
139 140 Err(e) => {
140 141 // EINVAL can be raised as invalid path syntax under win32.
141 142 if e.kind() != std::io::ErrorKind::NotFound
142 143 && e.kind() != std::io::ErrorKind::InvalidInput
143 144 && e.raw_os_error() != Some(20)
144 145 {
145 146 // Rust does not yet have an `ErrorKind` for
146 147 // `NotADirectory` (errno 20)
147 148 // It happens if the dirstate contains `foo/bar` and
148 149 // foo is not a directory
149 150 return Err(HgPathError::NotFsCompliant(path.to_owned()));
150 151 }
151 152 }
152 153 Ok(meta) => {
153 154 if meta.file_type().is_symlink() {
154 155 return Err(HgPathError::TraversesSymbolicLink {
155 156 path: path.to_owned(),
156 157 symlink: prefix.to_owned(),
157 158 });
158 159 }
159 160 if meta.file_type().is_dir()
160 161 && current_path.join(".hg").is_dir()
161 162 {
162 163 return Err(HgPathError::IsInsideNestedRepo {
163 164 path: path.to_owned(),
164 165 nested_repo: prefix.to_owned(),
165 166 });
166 167 }
167 168 }
168 169 };
169 170
170 171 Ok(())
171 172 }
172 173
173 174 pub fn check(&mut self, path: impl AsRef<HgPath>) -> bool {
174 175 self.audit_path(path).is_ok()
175 176 }
176 177 }
177 178
178 179 #[cfg(test)]
179 180 mod tests {
180 181 use super::*;
181 182 use crate::utils::files::get_path_from_bytes;
182 183 use crate::utils::hg_path::path_to_hg_path_buf;
183 184
184 185 #[test]
185 186 fn test_path_auditor() {
186 187 let mut auditor = PathAuditor::new(get_path_from_bytes(b"/tmp"));
187 188
188 189 let path = HgPath::new(b".hg/00changelog.i");
189 190 assert_eq!(
190 191 auditor.audit_path(path),
191 192 Err(HgPathError::InsideDotHg(path.to_owned()))
192 193 );
193 194 let path = HgPath::new(b"this/is/nested/.hg/thing.txt");
194 195 assert_eq!(
195 196 auditor.audit_path(path),
196 197 Err(HgPathError::IsInsideNestedRepo {
197 198 path: path.to_owned(),
198 199 nested_repo: HgPathBuf::from_bytes(b"this/is/nested")
199 200 })
200 201 );
201 202
202 203 use std::fs::{create_dir, File};
203 204 use tempfile::tempdir;
204 205
205 206 let base_dir = tempdir().unwrap();
206 207 let base_dir_path = base_dir.path();
207 208 let a = base_dir_path.join("a");
208 209 let b = base_dir_path.join("b");
209 210 create_dir(&a).unwrap();
210 211 let in_a_path = a.join("in_a");
211 212 File::create(in_a_path).unwrap();
212 213
213 214 // TODO make portable
214 215 std::os::unix::fs::symlink(&a, &b).unwrap();
215 216
216 217 let buf = b.join("in_a").components().skip(2).collect::<PathBuf>();
217 218 eprintln!("buf: {}", buf.display());
218 219 let path = path_to_hg_path_buf(buf).unwrap();
219 220 assert_eq!(
220 221 auditor.audit_path(&path),
221 222 Err(HgPathError::TraversesSymbolicLink {
222 223 path: path,
223 224 symlink: path_to_hg_path_buf(
224 225 b.components().skip(2).collect::<PathBuf>()
225 226 )
226 227 .unwrap()
227 228 })
228 229 );
229 230 }
230 231 }
General Comments 0
You need to be logged in to leave comments. Login now