Show More
@@ -1,634 +1,639 b'' | |||||
1 | use sha1::{Digest, Sha1}; |
|
1 | use sha1::{Digest, Sha1}; | |
2 |
|
2 | |||
3 | #[derive(PartialEq, Debug)] |
|
3 | #[derive(PartialEq, Debug)] | |
4 | #[allow(non_camel_case_types)] |
|
4 | #[allow(non_camel_case_types)] | |
5 | #[allow(clippy::upper_case_acronyms)] |
|
5 | #[allow(clippy::upper_case_acronyms)] | |
6 | enum path_state { |
|
6 | enum path_state { | |
7 | START, /* first byte of a path component */ |
|
7 | START, /* first byte of a path component */ | |
8 | A, /* "AUX" */ |
|
8 | A, /* "AUX" */ | |
9 | AU, |
|
9 | AU, | |
10 | THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */ |
|
10 | THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */ | |
11 | C, /* "CON" or "COMn" */ |
|
11 | C, /* "CON" or "COMn" */ | |
12 | CO, |
|
12 | CO, | |
13 | COMLPT, /* "COM" or "LPT" */ |
|
13 | COMLPT, /* "COM" or "LPT" */ | |
14 | COMLPTn, |
|
14 | COMLPTn, | |
15 | L, |
|
15 | L, | |
16 | LP, |
|
16 | LP, | |
17 | N, |
|
17 | N, | |
18 | NU, |
|
18 | NU, | |
19 | P, /* "PRN" */ |
|
19 | P, /* "PRN" */ | |
20 | PR, |
|
20 | PR, | |
21 | LDOT, /* leading '.' */ |
|
21 | LDOT, /* leading '.' */ | |
22 | DOT, /* '.' in a non-leading position */ |
|
22 | DOT, /* '.' in a non-leading position */ | |
23 | H, /* ".h" */ |
|
23 | H, /* ".h" */ | |
24 | HGDI, /* ".hg", ".d", or ".i" */ |
|
24 | HGDI, /* ".hg", ".d", or ".i" */ | |
25 | SPACE, |
|
25 | SPACE, | |
26 | DEFAULT, /* byte of a path component after the first */ |
|
26 | DEFAULT, /* byte of a path component after the first */ | |
27 | } |
|
27 | } | |
28 |
|
28 | |||
29 | /* state machine for dir-encoding */ |
|
29 | /* state machine for dir-encoding */ | |
30 | #[allow(non_camel_case_types)] |
|
30 | #[allow(non_camel_case_types)] | |
31 | #[allow(clippy::upper_case_acronyms)] |
|
31 | #[allow(clippy::upper_case_acronyms)] | |
32 | enum dir_state { |
|
32 | enum dir_state { | |
33 | DDOT, |
|
33 | DDOT, | |
34 | DH, |
|
34 | DH, | |
35 | DHGDI, |
|
35 | DHGDI, | |
36 | DDEFAULT, |
|
36 | DDEFAULT, | |
37 | } |
|
37 | } | |
38 |
|
38 | |||
39 | trait Sink { |
|
39 | trait Sink { | |
40 | fn write_byte(&mut self, c: u8); |
|
40 | fn write_byte(&mut self, c: u8); | |
41 | fn write_bytes(&mut self, c: &[u8]); |
|
41 | fn write_bytes(&mut self, c: &[u8]); | |
42 | } |
|
42 | } | |
43 |
|
43 | |||
44 | fn inset(bitset: &[u32; 8], c: u8) -> bool { |
|
44 | fn inset(bitset: &[u32; 8], c: u8) -> bool { | |
45 | bitset[(c as usize) >> 5] & (1 << (c & 31)) != 0 |
|
45 | bitset[(c as usize) >> 5] & (1 << (c & 31)) != 0 | |
46 | } |
|
46 | } | |
47 |
|
47 | |||
48 | const MAXENCODE: usize = 4096 * 4; |
|
48 | const MAXENCODE: usize = 4096 * 4; | |
49 |
|
49 | |||
50 | struct DestArr<const N: usize> { |
|
50 | struct DestArr<const N: usize> { | |
51 | buf: [u8; N], |
|
51 | buf: [u8; N], | |
52 | pub len: usize, |
|
52 | pub len: usize, | |
53 | } |
|
53 | } | |
54 |
|
54 | |||
55 | impl<const N: usize> DestArr<N> { |
|
55 | impl<const N: usize> DestArr<N> { | |
56 | pub fn create() -> Self { |
|
56 | pub fn create() -> Self { | |
57 | DestArr { |
|
57 | DestArr { | |
58 | buf: [0; N], |
|
58 | buf: [0; N], | |
59 | len: 0, |
|
59 | len: 0, | |
60 | } |
|
60 | } | |
61 | } |
|
61 | } | |
62 |
|
62 | |||
63 | pub fn contents(&self) -> &[u8] { |
|
63 | pub fn contents(&self) -> &[u8] { | |
64 | &self.buf[..self.len] |
|
64 | &self.buf[..self.len] | |
65 | } |
|
65 | } | |
66 | } |
|
66 | } | |
67 |
|
67 | |||
68 | impl<const N: usize> Sink for DestArr<N> { |
|
68 | impl<const N: usize> Sink for DestArr<N> { | |
69 | fn write_byte(&mut self, c: u8) { |
|
69 | fn write_byte(&mut self, c: u8) { | |
70 | self.buf[self.len] = c; |
|
70 | self.buf[self.len] = c; | |
71 | self.len += 1; |
|
71 | self.len += 1; | |
72 | } |
|
72 | } | |
73 |
|
73 | |||
74 | fn write_bytes(&mut self, src: &[u8]) { |
|
74 | fn write_bytes(&mut self, src: &[u8]) { | |
75 | self.buf[self.len..self.len + src.len()].copy_from_slice(src); |
|
75 | self.buf[self.len..self.len + src.len()].copy_from_slice(src); | |
76 | self.len += src.len(); |
|
76 | self.len += src.len(); | |
77 | } |
|
77 | } | |
78 | } |
|
78 | } | |
79 |
|
79 | |||
80 | struct MeasureDest { |
|
80 | struct MeasureDest { | |
81 | pub len: usize, |
|
81 | pub len: usize, | |
82 | } |
|
82 | } | |
83 |
|
83 | |||
84 | impl Sink for Vec<u8> { |
|
84 | impl Sink for Vec<u8> { | |
85 | fn write_byte(&mut self, c: u8) { |
|
85 | fn write_byte(&mut self, c: u8) { | |
86 | self.push(c) |
|
86 | self.push(c) | |
87 | } |
|
87 | } | |
88 |
|
88 | |||
89 | fn write_bytes(&mut self, src: &[u8]) { |
|
89 | fn write_bytes(&mut self, src: &[u8]) { | |
90 | self.extend_from_slice(src) |
|
90 | self.extend_from_slice(src) | |
91 | } |
|
91 | } | |
92 | } |
|
92 | } | |
93 |
|
93 | |||
94 | impl MeasureDest { |
|
94 | impl MeasureDest { | |
95 | fn create() -> Self { |
|
95 | fn create() -> Self { | |
96 | Self { len: 0 } |
|
96 | Self { len: 0 } | |
97 | } |
|
97 | } | |
98 | } |
|
98 | } | |
99 |
|
99 | |||
100 | impl Sink for MeasureDest { |
|
100 | impl Sink for MeasureDest { | |
101 | fn write_byte(&mut self, _c: u8) { |
|
101 | fn write_byte(&mut self, _c: u8) { | |
102 | self.len += 1; |
|
102 | self.len += 1; | |
103 | } |
|
103 | } | |
104 |
|
104 | |||
105 | fn write_bytes(&mut self, src: &[u8]) { |
|
105 | fn write_bytes(&mut self, src: &[u8]) { | |
106 | self.len += src.len(); |
|
106 | self.len += src.len(); | |
107 | } |
|
107 | } | |
108 | } |
|
108 | } | |
109 |
|
109 | |||
110 | fn hexencode(dest: &mut impl Sink, c: u8) { |
|
110 | fn hexencode(dest: &mut impl Sink, c: u8) { | |
111 | let hexdigit = b"0123456789abcdef"; |
|
111 | let hexdigit = b"0123456789abcdef"; | |
112 | dest.write_byte(hexdigit[(c as usize) >> 4]); |
|
112 | dest.write_byte(hexdigit[(c as usize) >> 4]); | |
113 | dest.write_byte(hexdigit[(c as usize) & 15]); |
|
113 | dest.write_byte(hexdigit[(c as usize) & 15]); | |
114 | } |
|
114 | } | |
115 |
|
115 | |||
116 | /* 3-byte escape: tilde followed by two hex digits */ |
|
116 | /* 3-byte escape: tilde followed by two hex digits */ | |
117 | fn escape3(dest: &mut impl Sink, c: u8) { |
|
117 | fn escape3(dest: &mut impl Sink, c: u8) { | |
118 | dest.write_byte(b'~'); |
|
118 | dest.write_byte(b'~'); | |
119 | hexencode(dest, c); |
|
119 | hexencode(dest, c); | |
120 | } |
|
120 | } | |
121 |
|
121 | |||
122 | fn encode_dir(dest: &mut impl Sink, src: &[u8]) { |
|
122 | fn encode_dir(dest: &mut impl Sink, src: &[u8]) { | |
123 | let mut state = dir_state::DDEFAULT; |
|
123 | let mut state = dir_state::DDEFAULT; | |
124 | let mut i = 0; |
|
124 | let mut i = 0; | |
125 |
|
125 | |||
126 | while i < src.len() { |
|
126 | while i < src.len() { | |
127 | match state { |
|
127 | match state { | |
128 | dir_state::DDOT => match src[i] { |
|
128 | dir_state::DDOT => match src[i] { | |
129 | b'd' | b'i' => { |
|
129 | b'd' | b'i' => { | |
130 | state = dir_state::DHGDI; |
|
130 | state = dir_state::DHGDI; | |
131 | dest.write_byte(src[i]); |
|
131 | dest.write_byte(src[i]); | |
132 | i += 1; |
|
132 | i += 1; | |
133 | } |
|
133 | } | |
134 | b'h' => { |
|
134 | b'h' => { | |
135 | state = dir_state::DH; |
|
135 | state = dir_state::DH; | |
136 | dest.write_byte(src[i]); |
|
136 | dest.write_byte(src[i]); | |
137 | i += 1; |
|
137 | i += 1; | |
138 | } |
|
138 | } | |
139 | _ => { |
|
139 | _ => { | |
140 | state = dir_state::DDEFAULT; |
|
140 | state = dir_state::DDEFAULT; | |
141 | } |
|
141 | } | |
142 | }, |
|
142 | }, | |
143 | dir_state::DH => { |
|
143 | dir_state::DH => { | |
144 | if src[i] == b'g' { |
|
144 | if src[i] == b'g' { | |
145 | state = dir_state::DHGDI; |
|
145 | state = dir_state::DHGDI; | |
146 | dest.write_byte(src[i]); |
|
146 | dest.write_byte(src[i]); | |
147 | i += 1; |
|
147 | i += 1; | |
148 | } else { |
|
148 | } else { | |
149 | state = dir_state::DDEFAULT; |
|
149 | state = dir_state::DDEFAULT; | |
150 | } |
|
150 | } | |
151 | } |
|
151 | } | |
152 | dir_state::DHGDI => { |
|
152 | dir_state::DHGDI => { | |
153 | if src[i] == b'/' { |
|
153 | if src[i] == b'/' { | |
154 | dest.write_bytes(b".hg"); |
|
154 | dest.write_bytes(b".hg"); | |
155 | dest.write_byte(src[i]); |
|
155 | dest.write_byte(src[i]); | |
156 | i += 1; |
|
156 | i += 1; | |
157 | } |
|
157 | } | |
158 | state = dir_state::DDEFAULT; |
|
158 | state = dir_state::DDEFAULT; | |
159 | } |
|
159 | } | |
160 | dir_state::DDEFAULT => { |
|
160 | dir_state::DDEFAULT => { | |
161 | if src[i] == b'.' { |
|
161 | if src[i] == b'.' { | |
162 | state = dir_state::DDOT |
|
162 | state = dir_state::DDOT | |
163 | } |
|
163 | } | |
164 | dest.write_byte(src[i]); |
|
164 | dest.write_byte(src[i]); | |
165 | i += 1; |
|
165 | i += 1; | |
166 | } |
|
166 | } | |
167 | } |
|
167 | } | |
168 | } |
|
168 | } | |
169 | } |
|
169 | } | |
170 |
|
170 | |||
171 | fn _encode( |
|
171 | fn _encode( | |
172 | twobytes: &[u32; 8], |
|
172 | twobytes: &[u32; 8], | |
173 | onebyte: &[u32; 8], |
|
173 | onebyte: &[u32; 8], | |
174 | dest: &mut impl Sink, |
|
174 | dest: &mut impl Sink, | |
175 | src: &[u8], |
|
175 | src: &[u8], | |
176 | encodedir: bool, |
|
176 | encodedir: bool, | |
177 | ) { |
|
177 | ) { | |
178 | let mut state = path_state::START; |
|
178 | let mut state = path_state::START; | |
179 | let mut i = 0; |
|
179 | let mut i = 0; | |
180 | let len = src.len(); |
|
180 | let len = src.len(); | |
181 |
|
181 | |||
182 | while i < len { |
|
182 | while i < len { | |
183 | match state { |
|
183 | match state { | |
184 | path_state::START => match src[i] { |
|
184 | path_state::START => match src[i] { | |
185 | b'/' => { |
|
185 | b'/' => { | |
186 | dest.write_byte(src[i]); |
|
186 | dest.write_byte(src[i]); | |
187 | i += 1; |
|
187 | i += 1; | |
188 | } |
|
188 | } | |
189 | b'.' => { |
|
189 | b'.' => { | |
190 | state = path_state::LDOT; |
|
190 | state = path_state::LDOT; | |
191 | escape3(dest, src[i]); |
|
191 | escape3(dest, src[i]); | |
192 | i += 1; |
|
192 | i += 1; | |
193 | } |
|
193 | } | |
194 | b' ' => { |
|
194 | b' ' => { | |
195 | state = path_state::DEFAULT; |
|
195 | state = path_state::DEFAULT; | |
196 | escape3(dest, src[i]); |
|
196 | escape3(dest, src[i]); | |
197 | i += 1; |
|
197 | i += 1; | |
198 | } |
|
198 | } | |
199 | b'a' => { |
|
199 | b'a' => { | |
200 | state = path_state::A; |
|
200 | state = path_state::A; | |
201 | dest.write_byte(src[i]); |
|
201 | dest.write_byte(src[i]); | |
202 | i += 1; |
|
202 | i += 1; | |
203 | } |
|
203 | } | |
204 | b'c' => { |
|
204 | b'c' => { | |
205 | state = path_state::C; |
|
205 | state = path_state::C; | |
206 | dest.write_byte(src[i]); |
|
206 | dest.write_byte(src[i]); | |
207 | i += 1; |
|
207 | i += 1; | |
208 | } |
|
208 | } | |
209 | b'l' => { |
|
209 | b'l' => { | |
210 | state = path_state::L; |
|
210 | state = path_state::L; | |
211 | dest.write_byte(src[i]); |
|
211 | dest.write_byte(src[i]); | |
212 | i += 1; |
|
212 | i += 1; | |
213 | } |
|
213 | } | |
214 | b'n' => { |
|
214 | b'n' => { | |
215 | state = path_state::N; |
|
215 | state = path_state::N; | |
216 | dest.write_byte(src[i]); |
|
216 | dest.write_byte(src[i]); | |
217 | i += 1; |
|
217 | i += 1; | |
218 | } |
|
218 | } | |
219 | b'p' => { |
|
219 | b'p' => { | |
220 | state = path_state::P; |
|
220 | state = path_state::P; | |
221 | dest.write_byte(src[i]); |
|
221 | dest.write_byte(src[i]); | |
222 | i += 1; |
|
222 | i += 1; | |
223 | } |
|
223 | } | |
224 | _ => { |
|
224 | _ => { | |
225 | state = path_state::DEFAULT; |
|
225 | state = path_state::DEFAULT; | |
226 | } |
|
226 | } | |
227 | }, |
|
227 | }, | |
228 | path_state::A => { |
|
228 | path_state::A => { | |
229 | if src[i] == b'u' { |
|
229 | if src[i] == b'u' { | |
230 | state = path_state::AU; |
|
230 | state = path_state::AU; | |
231 | dest.write_byte(src[i]); |
|
231 | dest.write_byte(src[i]); | |
232 | i += 1; |
|
232 | i += 1; | |
233 | } else { |
|
233 | } else { | |
234 | state = path_state::DEFAULT; |
|
234 | state = path_state::DEFAULT; | |
235 | } |
|
235 | } | |
236 | } |
|
236 | } | |
237 | path_state::AU => { |
|
237 | path_state::AU => { | |
238 | if src[i] == b'x' { |
|
238 | if src[i] == b'x' { | |
239 | state = path_state::THIRD; |
|
239 | state = path_state::THIRD; | |
240 | i += 1; |
|
240 | i += 1; | |
241 | } else { |
|
241 | } else { | |
242 | state = path_state::DEFAULT; |
|
242 | state = path_state::DEFAULT; | |
243 | } |
|
243 | } | |
244 | } |
|
244 | } | |
245 | path_state::THIRD => { |
|
245 | path_state::THIRD => { | |
246 | state = path_state::DEFAULT; |
|
246 | state = path_state::DEFAULT; | |
247 | match src[i] { |
|
247 | match src[i] { | |
248 | b'.' | b'/' | b'\0' => escape3(dest, src[i - 1]), |
|
248 | b'.' | b'/' | b'\0' => escape3(dest, src[i - 1]), | |
249 | _ => i -= 1, |
|
249 | _ => i -= 1, | |
250 | } |
|
250 | } | |
251 | } |
|
251 | } | |
252 | path_state::C => { |
|
252 | path_state::C => { | |
253 | if src[i] == b'o' { |
|
253 | if src[i] == b'o' { | |
254 | state = path_state::CO; |
|
254 | state = path_state::CO; | |
255 | dest.write_byte(src[i]); |
|
255 | dest.write_byte(src[i]); | |
256 | i += 1; |
|
256 | i += 1; | |
257 | } else { |
|
257 | } else { | |
258 | state = path_state::DEFAULT; |
|
258 | state = path_state::DEFAULT; | |
259 | } |
|
259 | } | |
260 | } |
|
260 | } | |
261 | path_state::CO => { |
|
261 | path_state::CO => { | |
262 | if src[i] == b'm' { |
|
262 | if src[i] == b'm' { | |
263 | state = path_state::COMLPT; |
|
263 | state = path_state::COMLPT; | |
264 | i += 1; |
|
264 | i += 1; | |
265 | } else if src[i] == b'n' { |
|
265 | } else if src[i] == b'n' { | |
266 | state = path_state::THIRD; |
|
266 | state = path_state::THIRD; | |
267 | i += 1; |
|
267 | i += 1; | |
268 | } else { |
|
268 | } else { | |
269 | state = path_state::DEFAULT; |
|
269 | state = path_state::DEFAULT; | |
270 | } |
|
270 | } | |
271 | } |
|
271 | } | |
272 | path_state::COMLPT => { |
|
272 | path_state::COMLPT => { | |
273 | if src[i] >= b'1' && src[i] <= b'9' { |
|
273 | if src[i] >= b'1' && src[i] <= b'9' { | |
274 | state = path_state::COMLPTn; |
|
274 | state = path_state::COMLPTn; | |
275 | i += 1; |
|
275 | i += 1; | |
276 | } else { |
|
276 | } else { | |
277 | state = path_state::DEFAULT; |
|
277 | state = path_state::DEFAULT; | |
278 | dest.write_byte(src[i - 1]); |
|
278 | dest.write_byte(src[i - 1]); | |
279 | } |
|
279 | } | |
280 | } |
|
280 | } | |
281 | path_state::COMLPTn => { |
|
281 | path_state::COMLPTn => { | |
282 | state = path_state::DEFAULT; |
|
282 | state = path_state::DEFAULT; | |
283 | match src[i] { |
|
283 | match src[i] { | |
284 | b'.' | b'/' | b'\0' => { |
|
284 | b'.' | b'/' | b'\0' => { | |
285 | escape3(dest, src[i - 2]); |
|
285 | escape3(dest, src[i - 2]); | |
286 | dest.write_byte(src[i - 1]); |
|
286 | dest.write_byte(src[i - 1]); | |
287 | } |
|
287 | } | |
288 | _ => { |
|
288 | _ => { | |
289 | dest.write_bytes(&src[i - 2..i]); |
|
289 | dest.write_bytes(&src[i - 2..i]); | |
290 | } |
|
290 | } | |
291 | } |
|
291 | } | |
292 | } |
|
292 | } | |
293 | path_state::L => { |
|
293 | path_state::L => { | |
294 | if src[i] == b'p' { |
|
294 | if src[i] == b'p' { | |
295 | state = path_state::LP; |
|
295 | state = path_state::LP; | |
296 | dest.write_byte(src[i]); |
|
296 | dest.write_byte(src[i]); | |
297 | i += 1; |
|
297 | i += 1; | |
298 | } else { |
|
298 | } else { | |
299 | state = path_state::DEFAULT; |
|
299 | state = path_state::DEFAULT; | |
300 | } |
|
300 | } | |
301 | } |
|
301 | } | |
302 | path_state::LP => { |
|
302 | path_state::LP => { | |
303 | if src[i] == b't' { |
|
303 | if src[i] == b't' { | |
304 | state = path_state::COMLPT; |
|
304 | state = path_state::COMLPT; | |
305 | i += 1; |
|
305 | i += 1; | |
306 | } else { |
|
306 | } else { | |
307 | state = path_state::DEFAULT; |
|
307 | state = path_state::DEFAULT; | |
308 | } |
|
308 | } | |
309 | } |
|
309 | } | |
310 | path_state::N => { |
|
310 | path_state::N => { | |
311 | if src[i] == b'u' { |
|
311 | if src[i] == b'u' { | |
312 | state = path_state::NU; |
|
312 | state = path_state::NU; | |
313 | dest.write_byte(src[i]); |
|
313 | dest.write_byte(src[i]); | |
314 | i += 1; |
|
314 | i += 1; | |
315 | } else { |
|
315 | } else { | |
316 | state = path_state::DEFAULT; |
|
316 | state = path_state::DEFAULT; | |
317 | } |
|
317 | } | |
318 | } |
|
318 | } | |
319 | path_state::NU => { |
|
319 | path_state::NU => { | |
320 | if src[i] == b'l' { |
|
320 | if src[i] == b'l' { | |
321 | state = path_state::THIRD; |
|
321 | state = path_state::THIRD; | |
322 | i += 1; |
|
322 | i += 1; | |
323 | } else { |
|
323 | } else { | |
324 | state = path_state::DEFAULT; |
|
324 | state = path_state::DEFAULT; | |
325 | } |
|
325 | } | |
326 | } |
|
326 | } | |
327 | path_state::P => { |
|
327 | path_state::P => { | |
328 | if src[i] == b'r' { |
|
328 | if src[i] == b'r' { | |
329 | state = path_state::PR; |
|
329 | state = path_state::PR; | |
330 | dest.write_byte(src[i]); |
|
330 | dest.write_byte(src[i]); | |
331 | i += 1; |
|
331 | i += 1; | |
332 | } else { |
|
332 | } else { | |
333 | state = path_state::DEFAULT; |
|
333 | state = path_state::DEFAULT; | |
334 | } |
|
334 | } | |
335 | } |
|
335 | } | |
336 | path_state::PR => { |
|
336 | path_state::PR => { | |
337 | if src[i] == b'n' { |
|
337 | if src[i] == b'n' { | |
338 | state = path_state::THIRD; |
|
338 | state = path_state::THIRD; | |
339 | i += 1; |
|
339 | i += 1; | |
340 | } else { |
|
340 | } else { | |
341 | state = path_state::DEFAULT; |
|
341 | state = path_state::DEFAULT; | |
342 | } |
|
342 | } | |
343 | } |
|
343 | } | |
344 | path_state::LDOT => match src[i] { |
|
344 | path_state::LDOT => match src[i] { | |
345 | b'd' | b'i' => { |
|
345 | b'd' | b'i' => { | |
346 | state = path_state::HGDI; |
|
346 | state = path_state::HGDI; | |
347 | dest.write_byte(src[i]); |
|
347 | dest.write_byte(src[i]); | |
348 | i += 1; |
|
348 | i += 1; | |
349 | } |
|
349 | } | |
350 | b'h' => { |
|
350 | b'h' => { | |
351 | state = path_state::H; |
|
351 | state = path_state::H; | |
352 | dest.write_byte(src[i]); |
|
352 | dest.write_byte(src[i]); | |
353 | i += 1; |
|
353 | i += 1; | |
354 | } |
|
354 | } | |
355 | _ => { |
|
355 | _ => { | |
356 | state = path_state::DEFAULT; |
|
356 | state = path_state::DEFAULT; | |
357 | } |
|
357 | } | |
358 | }, |
|
358 | }, | |
359 | path_state::DOT => match src[i] { |
|
359 | path_state::DOT => match src[i] { | |
360 | b'/' | b'\0' => { |
|
360 | b'/' | b'\0' => { | |
361 | state = path_state::START; |
|
361 | state = path_state::START; | |
362 | dest.write_bytes(b"~2e"); |
|
362 | dest.write_bytes(b"~2e"); | |
363 | dest.write_byte(src[i]); |
|
363 | dest.write_byte(src[i]); | |
364 | i += 1; |
|
364 | i += 1; | |
365 | } |
|
365 | } | |
366 | b'd' | b'i' => { |
|
366 | b'd' | b'i' => { | |
367 | state = path_state::HGDI; |
|
367 | state = path_state::HGDI; | |
368 | dest.write_byte(b'.'); |
|
368 | dest.write_byte(b'.'); | |
369 | dest.write_byte(src[i]); |
|
369 | dest.write_byte(src[i]); | |
370 | i += 1; |
|
370 | i += 1; | |
371 | } |
|
371 | } | |
372 | b'h' => { |
|
372 | b'h' => { | |
373 | state = path_state::H; |
|
373 | state = path_state::H; | |
374 | dest.write_bytes(b".h"); |
|
374 | dest.write_bytes(b".h"); | |
375 | i += 1; |
|
375 | i += 1; | |
376 | } |
|
376 | } | |
377 | _ => { |
|
377 | _ => { | |
378 | state = path_state::DEFAULT; |
|
378 | state = path_state::DEFAULT; | |
379 | dest.write_byte(b'.'); |
|
379 | dest.write_byte(b'.'); | |
380 | } |
|
380 | } | |
381 | }, |
|
381 | }, | |
382 | path_state::H => { |
|
382 | path_state::H => { | |
383 | if src[i] == b'g' { |
|
383 | if src[i] == b'g' { | |
384 | state = path_state::HGDI; |
|
384 | state = path_state::HGDI; | |
385 | dest.write_byte(src[i]); |
|
385 | dest.write_byte(src[i]); | |
386 | i += 1; |
|
386 | i += 1; | |
387 | } else { |
|
387 | } else { | |
388 | state = path_state::DEFAULT; |
|
388 | state = path_state::DEFAULT; | |
389 | } |
|
389 | } | |
390 | } |
|
390 | } | |
391 | path_state::HGDI => { |
|
391 | path_state::HGDI => { | |
392 | if src[i] == b'/' { |
|
392 | if src[i] == b'/' { | |
393 | state = path_state::START; |
|
393 | state = path_state::START; | |
394 | if encodedir { |
|
394 | if encodedir { | |
395 | dest.write_bytes(b".hg"); |
|
395 | dest.write_bytes(b".hg"); | |
396 | } |
|
396 | } | |
397 | dest.write_byte(src[i]); |
|
397 | dest.write_byte(src[i]); | |
398 | i += 1 |
|
398 | i += 1 | |
399 | } else { |
|
399 | } else { | |
400 | state = path_state::DEFAULT; |
|
400 | state = path_state::DEFAULT; | |
401 | } |
|
401 | } | |
402 | } |
|
402 | } | |
403 | path_state::SPACE => match src[i] { |
|
403 | path_state::SPACE => match src[i] { | |
404 | b'/' | b'\0' => { |
|
404 | b'/' | b'\0' => { | |
405 | state = path_state::START; |
|
405 | state = path_state::START; | |
406 | dest.write_bytes(b"~20"); |
|
406 | dest.write_bytes(b"~20"); | |
407 | dest.write_byte(src[i]); |
|
407 | dest.write_byte(src[i]); | |
408 | i += 1; |
|
408 | i += 1; | |
409 | } |
|
409 | } | |
410 | _ => { |
|
410 | _ => { | |
411 | state = path_state::DEFAULT; |
|
411 | state = path_state::DEFAULT; | |
412 | dest.write_byte(b' '); |
|
412 | dest.write_byte(b' '); | |
413 | } |
|
413 | } | |
414 | }, |
|
414 | }, | |
415 | path_state::DEFAULT => { |
|
415 | path_state::DEFAULT => { | |
416 | while i != len && inset(onebyte, src[i]) { |
|
416 | while i != len && inset(onebyte, src[i]) { | |
417 | dest.write_byte(src[i]); |
|
417 | dest.write_byte(src[i]); | |
418 | i += 1; |
|
418 | i += 1; | |
419 | } |
|
419 | } | |
420 | if i == len { |
|
420 | if i == len { | |
421 | break; |
|
421 | break; | |
422 | } |
|
422 | } | |
423 | match src[i] { |
|
423 | match src[i] { | |
424 | b'.' => { |
|
424 | b'.' => { | |
425 | state = path_state::DOT; |
|
425 | state = path_state::DOT; | |
426 | i += 1 |
|
426 | i += 1 | |
427 | } |
|
427 | } | |
428 | b' ' => { |
|
428 | b' ' => { | |
429 | state = path_state::SPACE; |
|
429 | state = path_state::SPACE; | |
430 | i += 1 |
|
430 | i += 1 | |
431 | } |
|
431 | } | |
432 | b'/' => { |
|
432 | b'/' => { | |
433 | state = path_state::START; |
|
433 | state = path_state::START; | |
434 | dest.write_byte(b'/'); |
|
434 | dest.write_byte(b'/'); | |
435 | i += 1; |
|
435 | i += 1; | |
436 | } |
|
436 | } | |
437 | _ => { |
|
437 | _ => { | |
438 | if inset(onebyte, src[i]) { |
|
438 | if inset(onebyte, src[i]) { | |
439 | loop { |
|
439 | loop { | |
440 | dest.write_byte(src[i]); |
|
440 | dest.write_byte(src[i]); | |
441 | i += 1; |
|
441 | i += 1; | |
442 | if !(i < len && inset(onebyte, src[i])) { |
|
442 | if !(i < len && inset(onebyte, src[i])) { | |
443 | break; |
|
443 | break; | |
444 | } |
|
444 | } | |
445 | } |
|
445 | } | |
446 | } else if inset(twobytes, src[i]) { |
|
446 | } else if inset(twobytes, src[i]) { | |
447 | let c = src[i]; |
|
447 | let c = src[i]; | |
448 | i += 1; |
|
448 | i += 1; | |
449 | dest.write_byte(b'_'); |
|
449 | dest.write_byte(b'_'); | |
450 | dest.write_byte(if c == b'_' { |
|
450 | dest.write_byte(if c == b'_' { | |
451 | b'_' |
|
451 | b'_' | |
452 | } else { |
|
452 | } else { | |
453 | c + 32 |
|
453 | c + 32 | |
454 | }); |
|
454 | }); | |
455 | } else { |
|
455 | } else { | |
456 | escape3(dest, src[i]); |
|
456 | escape3(dest, src[i]); | |
457 | i += 1; |
|
457 | i += 1; | |
458 | } |
|
458 | } | |
459 | } |
|
459 | } | |
460 | } |
|
460 | } | |
461 | } |
|
461 | } | |
462 | } |
|
462 | } | |
463 | } |
|
463 | } | |
464 | match state { |
|
464 | match state { | |
465 | path_state::START => (), |
|
465 | path_state::START => (), | |
466 | path_state::A => (), |
|
466 | path_state::A => (), | |
467 | path_state::AU => (), |
|
467 | path_state::AU => (), | |
468 | path_state::THIRD => escape3(dest, src[i - 1]), |
|
468 | path_state::THIRD => escape3(dest, src[i - 1]), | |
469 | path_state::C => (), |
|
469 | path_state::C => (), | |
470 | path_state::CO => (), |
|
470 | path_state::CO => (), | |
471 | path_state::COMLPT => dest.write_byte(src[i - 1]), |
|
471 | path_state::COMLPT => dest.write_byte(src[i - 1]), | |
472 | path_state::COMLPTn => { |
|
472 | path_state::COMLPTn => { | |
473 | escape3(dest, src[i - 2]); |
|
473 | escape3(dest, src[i - 2]); | |
474 | dest.write_byte(src[i - 1]); |
|
474 | dest.write_byte(src[i - 1]); | |
475 | } |
|
475 | } | |
476 | path_state::L => (), |
|
476 | path_state::L => (), | |
477 | path_state::LP => (), |
|
477 | path_state::LP => (), | |
478 | path_state::N => (), |
|
478 | path_state::N => (), | |
479 | path_state::NU => (), |
|
479 | path_state::NU => (), | |
480 | path_state::P => (), |
|
480 | path_state::P => (), | |
481 | path_state::PR => (), |
|
481 | path_state::PR => (), | |
482 | path_state::LDOT => (), |
|
482 | path_state::LDOT => (), | |
483 | path_state::DOT => { |
|
483 | path_state::DOT => { | |
484 | dest.write_bytes(b"~2e"); |
|
484 | dest.write_bytes(b"~2e"); | |
485 | } |
|
485 | } | |
486 | path_state::H => (), |
|
486 | path_state::H => (), | |
487 | path_state::HGDI => (), |
|
487 | path_state::HGDI => (), | |
488 | path_state::SPACE => { |
|
488 | path_state::SPACE => { | |
489 | dest.write_bytes(b"~20"); |
|
489 | dest.write_bytes(b"~20"); | |
490 | } |
|
490 | } | |
491 | path_state::DEFAULT => (), |
|
491 | path_state::DEFAULT => (), | |
492 | } |
|
492 | } | |
493 | } |
|
493 | } | |
494 |
|
494 | |||
495 | fn basic_encode(dest: &mut impl Sink, src: &[u8]) { |
|
495 | fn basic_encode(dest: &mut impl Sink, src: &[u8]) { | |
496 | let twobytes: [u32; 8] = [0, 0, 0x87ff_fffe, 0, 0, 0, 0, 0]; |
|
496 | let twobytes: [u32; 8] = [0, 0, 0x87ff_fffe, 0, 0, 0, 0, 0]; | |
497 | let onebyte: [u32; 8] = |
|
497 | let onebyte: [u32; 8] = | |
498 | [1, 0x2bff_3bfa, 0x6800_0001, 0x2fff_ffff, 0, 0, 0, 0]; |
|
498 | [1, 0x2bff_3bfa, 0x6800_0001, 0x2fff_ffff, 0, 0, 0, 0]; | |
499 | _encode(&twobytes, &onebyte, dest, src, true) |
|
499 | _encode(&twobytes, &onebyte, dest, src, true) | |
500 | } |
|
500 | } | |
501 |
|
501 | |||
502 | const MAXSTOREPATHLEN: usize = 120; |
|
502 | const MAXSTOREPATHLEN: usize = 120; | |
503 |
|
503 | |||
504 | fn lower_encode(dest: &mut impl Sink, src: &[u8]) { |
|
504 | fn lower_encode(dest: &mut impl Sink, src: &[u8]) { | |
505 | let onebyte: [u32; 8] = |
|
505 | let onebyte: [u32; 8] = | |
506 | [1, 0x2bff_fbfb, 0xe800_0001, 0x2fff_ffff, 0, 0, 0, 0]; |
|
506 | [1, 0x2bff_fbfb, 0xe800_0001, 0x2fff_ffff, 0, 0, 0, 0]; | |
507 | let lower: [u32; 8] = [0, 0, 0x07ff_fffe, 0, 0, 0, 0, 0]; |
|
507 | let lower: [u32; 8] = [0, 0, 0x07ff_fffe, 0, 0, 0, 0, 0]; | |
508 | for c in src { |
|
508 | for c in src { | |
509 | if inset(&onebyte, *c) { |
|
509 | if inset(&onebyte, *c) { | |
510 | dest.write_byte(*c) |
|
510 | dest.write_byte(*c) | |
511 | } else if inset(&lower, *c) { |
|
511 | } else if inset(&lower, *c) { | |
512 | dest.write_byte(*c + 32) |
|
512 | dest.write_byte(*c + 32) | |
513 | } else { |
|
513 | } else { | |
514 | escape3(dest, *c) |
|
514 | escape3(dest, *c) | |
515 | } |
|
515 | } | |
516 | } |
|
516 | } | |
517 | } |
|
517 | } | |
518 |
|
518 | |||
519 | fn aux_encode(dest: &mut impl Sink, src: &[u8]) { |
|
519 | fn aux_encode(dest: &mut impl Sink, src: &[u8]) { | |
520 | let twobytes = [0; 8]; |
|
520 | let twobytes = [0; 8]; | |
521 | let onebyte: [u32; 8] = [!0, 0xffff_3ffe, !0, !0, !0, !0, !0, !0]; |
|
521 | let onebyte: [u32; 8] = [!0, 0xffff_3ffe, !0, !0, !0, !0, !0, !0]; | |
522 | _encode(&twobytes, &onebyte, dest, src, false) |
|
522 | _encode(&twobytes, &onebyte, dest, src, false) | |
523 | } |
|
523 | } | |
524 |
|
524 | |||
525 | fn hash_mangle(src: &[u8], sha: &[u8]) -> Vec<u8> { |
|
525 | fn hash_mangle(src: &[u8], sha: &[u8]) -> Vec<u8> { | |
526 | let dirprefixlen = 8; |
|
526 | let dirprefixlen = 8; | |
527 | let maxshortdirslen = 68; |
|
527 | let maxshortdirslen = 68; | |
528 |
|
528 | |||
529 | let last_slash = src.iter().rposition(|b| *b == b'/'); |
|
529 | let last_slash = src.iter().rposition(|b| *b == b'/'); | |
530 | let basename_start = match last_slash { |
|
530 | let basename_start = match last_slash { | |
531 | Some(slash) => slash + 1, |
|
531 | Some(slash) => slash + 1, | |
532 | None => 0, |
|
532 | None => 0, | |
533 | }; |
|
533 | }; | |
534 | let basename = &src[basename_start..]; |
|
534 | let basename = &src[basename_start..]; | |
535 | let ext = match basename.iter().rposition(|b| *b == b'.') { |
|
535 | let ext = match basename.iter().rposition(|b| *b == b'.') { | |
536 | None => &[], |
|
536 | None => &[], | |
537 | Some(dot) => &basename[dot..], |
|
537 | Some(dot) => &basename[dot..], | |
538 | }; |
|
538 | }; | |
539 |
|
539 | |||
540 | let mut dest = Vec::with_capacity(MAXSTOREPATHLEN); |
|
540 | let mut dest = Vec::with_capacity(MAXSTOREPATHLEN); | |
541 | dest.write_bytes(b"dh/"); |
|
541 | dest.write_bytes(b"dh/"); | |
542 |
|
542 | |||
543 | if let Some(last_slash) = last_slash { |
|
543 | if let Some(last_slash) = last_slash { | |
544 | for slice in src[..last_slash].split(|b| *b == b'/') { |
|
544 | for slice in src[..last_slash].split(|b| *b == b'/') { | |
545 | let slice = &slice[..std::cmp::min(slice.len(), dirprefixlen)]; |
|
545 | let slice = &slice[..std::cmp::min(slice.len(), dirprefixlen)]; | |
546 | if dest.len() + slice.len() > maxshortdirslen + 3 { |
|
546 | if dest.len() + slice.len() > maxshortdirslen + 3 { | |
547 | break; |
|
547 | break; | |
548 | } else { |
|
548 | } else { | |
549 | dest.write_bytes(slice); |
|
549 | let last_char = slice[slice.len() - 1]; | |
|
550 | if last_char == b'.' || last_char == b' ' { | |||
|
551 | dest.write_bytes(&slice[0..slice.len() - 1]); | |||
|
552 | dest.write_byte(b'_'); | |||
|
553 | } else { | |||
|
554 | dest.write_bytes(slice); | |||
|
555 | } | |||
550 | } |
|
556 | } | |
551 | dest.write_byte(b'/'); |
|
557 | dest.write_byte(b'/'); | |
552 | } |
|
558 | } | |
553 | } |
|
559 | } | |
554 |
|
560 | |||
555 | let used = dest.len() + 40 + ext.len(); |
|
561 | let used = dest.len() + 40 + ext.len(); | |
556 |
|
562 | |||
557 | if MAXSTOREPATHLEN > used { |
|
563 | if MAXSTOREPATHLEN > used { | |
558 | let slop = MAXSTOREPATHLEN - used; |
|
564 | let slop = MAXSTOREPATHLEN - used; | |
559 | let len = std::cmp::min(basename.len(), slop); |
|
565 | let len = std::cmp::min(basename.len(), slop); | |
560 | dest.write_bytes(&basename[..len]) |
|
566 | dest.write_bytes(&basename[..len]) | |
561 | } |
|
567 | } | |
562 | for c in sha { |
|
568 | for c in sha { | |
563 | hexencode(&mut dest, *c); |
|
569 | hexencode(&mut dest, *c); | |
564 | } |
|
570 | } | |
565 | dest.write_bytes(ext); |
|
571 | dest.write_bytes(ext); | |
566 | dest.shrink_to_fit(); |
|
572 | dest.shrink_to_fit(); | |
567 | dest |
|
573 | dest | |
568 | } |
|
574 | } | |
569 |
|
575 | |||
570 | fn hash_encode(src: &[u8]) -> Vec<u8> { |
|
576 | fn hash_encode(src: &[u8]) -> Vec<u8> { | |
571 | let mut dired: DestArr<MAXENCODE> = DestArr::create(); |
|
577 | let mut dired: DestArr<MAXENCODE> = DestArr::create(); | |
572 | let mut lowered: DestArr<MAXENCODE> = DestArr::create(); |
|
578 | let mut lowered: DestArr<MAXENCODE> = DestArr::create(); | |
573 | let mut auxed: DestArr<MAXENCODE> = DestArr::create(); |
|
579 | let mut auxed: DestArr<MAXENCODE> = DestArr::create(); | |
574 | let baselen = (src.len() - 5) * 3; |
|
580 | let baselen = (src.len() - 5) * 3; | |
575 | if baselen >= MAXENCODE { |
|
581 | if baselen >= MAXENCODE { | |
576 | panic!("path_encode::hash_encore: string too long: {}", baselen) |
|
582 | panic!("path_encode::hash_encore: string too long: {}", baselen) | |
577 | }; |
|
583 | }; | |
578 | encode_dir(&mut dired, src); |
|
584 | encode_dir(&mut dired, src); | |
579 | let sha = Sha1::digest(dired.contents()); |
|
585 | let sha = Sha1::digest(dired.contents()); | |
580 | lower_encode(&mut lowered, &dired.contents()[5..]); |
|
586 | lower_encode(&mut lowered, &dired.contents()[5..]); | |
581 | aux_encode(&mut auxed, lowered.contents()); |
|
587 | aux_encode(&mut auxed, lowered.contents()); | |
582 | hash_mangle(auxed.contents(), &sha) |
|
588 | hash_mangle(auxed.contents(), &sha) | |
583 | } |
|
589 | } | |
584 |
|
590 | |||
585 | pub fn path_encode(path: &[u8]) -> Vec<u8> { |
|
591 | pub fn path_encode(path: &[u8]) -> Vec<u8> { | |
586 | let newlen = if path.len() <= MAXSTOREPATHLEN { |
|
592 | let newlen = if path.len() <= MAXSTOREPATHLEN { | |
587 | let mut measure = MeasureDest::create(); |
|
593 | let mut measure = MeasureDest::create(); | |
588 | basic_encode(&mut measure, path); |
|
594 | basic_encode(&mut measure, path); | |
589 | measure.len |
|
595 | measure.len | |
590 | } else { |
|
596 | } else { | |
591 | return hash_encode(path); |
|
597 | return hash_encode(path); | |
592 | }; |
|
598 | }; | |
593 | if newlen <= MAXSTOREPATHLEN { |
|
599 | if newlen <= MAXSTOREPATHLEN { | |
594 | if newlen == path.len() { |
|
600 | if newlen == path.len() { | |
595 | path.to_vec() |
|
601 | path.to_vec() | |
596 | } else { |
|
602 | } else { | |
597 | let mut dest = Vec::with_capacity(newlen); |
|
603 | let mut dest = Vec::with_capacity(newlen); | |
598 | basic_encode(&mut dest, path); |
|
604 | basic_encode(&mut dest, path); | |
599 | assert!(dest.len() == newlen); |
|
605 | assert!(dest.len() == newlen); | |
600 | dest |
|
606 | dest | |
601 | } |
|
607 | } | |
602 | } else { |
|
608 | } else { | |
603 | hash_encode(path) |
|
609 | hash_encode(path) | |
604 | } |
|
610 | } | |
605 | } |
|
611 | } | |
606 |
|
612 | |||
607 | #[cfg(test)] |
|
613 | #[cfg(test)] | |
608 | mod tests { |
|
614 | mod tests { | |
609 | use super::*; |
|
615 | use super::*; | |
610 | use crate::utils::hg_path::HgPathBuf; |
|
616 | use crate::utils::hg_path::HgPathBuf; | |
611 |
|
617 | |||
612 | #[test] |
|
618 | #[test] | |
613 | fn test_dirname_ends_with_underscore() { |
|
619 | fn test_dirname_ends_with_underscore() { | |
614 | let input = b"data/dir1234.foo/ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ.i"; |
|
620 | let input = b"data/dir1234.foo/ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ.i"; | |
615 | // TODO: BUG: trailing dot should become an underscore |
|
621 | let expected = b"dh/dir1234_/abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij.if2e9ce59e095eff5f8f334dc809e65606a0aa50b.i"; | |
616 | let expected = b"dh/dir1234./abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij.if2e9ce59e095eff5f8f334dc809e65606a0aa50b.i"; |
|
|||
617 | let res = path_encode(input); |
|
622 | let res = path_encode(input); | |
618 | assert_eq!( |
|
623 | assert_eq!( | |
619 | HgPathBuf::from_bytes(&res), |
|
624 | HgPathBuf::from_bytes(&res), | |
620 | HgPathBuf::from_bytes(expected) |
|
625 | HgPathBuf::from_bytes(expected) | |
621 | ); |
|
626 | ); | |
622 | } |
|
627 | } | |
623 |
|
628 | |||
624 | #[test] |
|
629 | #[test] | |
625 | fn test_long_filename_at_root() { |
|
630 | fn test_long_filename_at_root() { | |
626 | let input = b"data/ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ.i"; |
|
631 | let input = b"data/ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ.i"; | |
627 | let expected = b"dh/abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij.i708243a2237a7afae259ea3545a72a2ef11c247b.i"; |
|
632 | let expected = b"dh/abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij.i708243a2237a7afae259ea3545a72a2ef11c247b.i"; | |
628 | let res = path_encode(input); |
|
633 | let res = path_encode(input); | |
629 | assert_eq!( |
|
634 | assert_eq!( | |
630 | HgPathBuf::from_bytes(&res), |
|
635 | HgPathBuf::from_bytes(&res), | |
631 | HgPathBuf::from_bytes(expected) |
|
636 | HgPathBuf::from_bytes(expected) | |
632 | ); |
|
637 | ); | |
633 | } |
|
638 | } | |
634 | } |
|
639 | } |
General Comments 0
You need to be logged in to leave comments.
Login now