##// END OF EJS Templates
rhg: in path_encode, use Vec directly instead of VecDest...
Arseniy Alekseyev -
r51059:8e50aa0d default
parent child Browse files
Show More
@@ -1,646 +1,634 b''
1 1 use sha1::{Digest, Sha1};
2 2
3 3 #[derive(PartialEq, Debug)]
4 4 #[allow(non_camel_case_types)]
5 5 #[allow(clippy::upper_case_acronyms)]
6 6 enum path_state {
7 7 START, /* first byte of a path component */
8 8 A, /* "AUX" */
9 9 AU,
10 10 THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
11 11 C, /* "CON" or "COMn" */
12 12 CO,
13 13 COMLPT, /* "COM" or "LPT" */
14 14 COMLPTn,
15 15 L,
16 16 LP,
17 17 N,
18 18 NU,
19 19 P, /* "PRN" */
20 20 PR,
21 21 LDOT, /* leading '.' */
22 22 DOT, /* '.' in a non-leading position */
23 23 H, /* ".h" */
24 24 HGDI, /* ".hg", ".d", or ".i" */
25 25 SPACE,
26 26 DEFAULT, /* byte of a path component after the first */
27 27 }
28 28
29 29 /* state machine for dir-encoding */
30 30 #[allow(non_camel_case_types)]
31 31 #[allow(clippy::upper_case_acronyms)]
32 32 enum dir_state {
33 33 DDOT,
34 34 DH,
35 35 DHGDI,
36 36 DDEFAULT,
37 37 }
38 38
39 39 trait Sink {
40 40 fn write_byte(&mut self, c: u8);
41 41 fn write_bytes(&mut self, c: &[u8]);
42 42 }
43 43
44 44 fn inset(bitset: &[u32; 8], c: u8) -> bool {
45 45 bitset[(c as usize) >> 5] & (1 << (c & 31)) != 0
46 46 }
47 47
48 48 const MAXENCODE: usize = 4096 * 4;
49 49
50 50 struct DestArr<const N: usize> {
51 51 buf: [u8; N],
52 52 pub len: usize,
53 53 }
54 54
55 55 impl<const N: usize> DestArr<N> {
56 56 pub fn create() -> Self {
57 57 DestArr {
58 58 buf: [0; N],
59 59 len: 0,
60 60 }
61 61 }
62 62
63 63 pub fn contents(&self) -> &[u8] {
64 64 &self.buf[..self.len]
65 65 }
66 66 }
67 67
68 68 impl<const N: usize> Sink for DestArr<N> {
69 69 fn write_byte(&mut self, c: u8) {
70 70 self.buf[self.len] = c;
71 71 self.len += 1;
72 72 }
73 73
74 74 fn write_bytes(&mut self, src: &[u8]) {
75 75 self.buf[self.len..self.len + src.len()].copy_from_slice(src);
76 76 self.len += src.len();
77 77 }
78 78 }
79 79
80 struct VecDest {
81 buf: Vec<u8>,
82 }
83
84 80 struct MeasureDest {
85 81 pub len: usize,
86 82 }
87 83
88 impl VecDest {
89 pub fn create(capacity : usize) -> Self {
90 Self {
91 buf: Vec::with_capacity(capacity),
92 }
93 }
94 }
95
96 impl Sink for VecDest {
84 impl Sink for Vec<u8> {
97 85 fn write_byte(&mut self, c: u8) {
98 self.buf.push(c)
86 self.push(c)
99 87 }
100 88
101 89 fn write_bytes(&mut self, src: &[u8]) {
102 self.buf.extend_from_slice(src)
90 self.extend_from_slice(src)
103 91 }
104 92 }
105 93
106 94 impl MeasureDest {
107 95 fn create() -> Self {
108 96 Self { len: 0 }
109 97 }
110 98 }
111 99
112 100 impl Sink for MeasureDest {
113 101 fn write_byte(&mut self, _c: u8) {
114 102 self.len += 1;
115 103 }
116 104
117 105 fn write_bytes(&mut self, src: &[u8]) {
118 106 self.len += src.len();
119 107 }
120 108 }
121 109
122 110 fn hexencode(dest: &mut impl Sink, c: u8) {
123 111 let hexdigit = b"0123456789abcdef";
124 112 dest.write_byte(hexdigit[(c as usize) >> 4]);
125 113 dest.write_byte(hexdigit[(c as usize) & 15]);
126 114 }
127 115
128 116 /* 3-byte escape: tilde followed by two hex digits */
129 117 fn escape3(dest: &mut impl Sink, c: u8) {
130 118 dest.write_byte(b'~');
131 119 hexencode(dest, c);
132 120 }
133 121
134 122 fn encode_dir(dest: &mut impl Sink, src: &[u8]) {
135 123 let mut state = dir_state::DDEFAULT;
136 124 let mut i = 0;
137 125
138 126 while i < src.len() {
139 127 match state {
140 128 dir_state::DDOT => match src[i] {
141 129 b'd' | b'i' => {
142 130 state = dir_state::DHGDI;
143 131 dest.write_byte(src[i]);
144 132 i += 1;
145 133 }
146 134 b'h' => {
147 135 state = dir_state::DH;
148 136 dest.write_byte(src[i]);
149 137 i += 1;
150 138 }
151 139 _ => {
152 140 state = dir_state::DDEFAULT;
153 141 }
154 142 },
155 143 dir_state::DH => {
156 144 if src[i] == b'g' {
157 145 state = dir_state::DHGDI;
158 146 dest.write_byte(src[i]);
159 147 i += 1;
160 148 } else {
161 149 state = dir_state::DDEFAULT;
162 150 }
163 151 }
164 152 dir_state::DHGDI => {
165 153 if src[i] == b'/' {
166 154 dest.write_bytes(b".hg");
167 155 dest.write_byte(src[i]);
168 156 i += 1;
169 157 }
170 158 state = dir_state::DDEFAULT;
171 159 }
172 160 dir_state::DDEFAULT => {
173 161 if src[i] == b'.' {
174 162 state = dir_state::DDOT
175 163 }
176 164 dest.write_byte(src[i]);
177 165 i += 1;
178 166 }
179 167 }
180 168 }
181 169 }
182 170
183 171 fn _encode(
184 172 twobytes: &[u32; 8],
185 173 onebyte: &[u32; 8],
186 174 dest: &mut impl Sink,
187 175 src: &[u8],
188 176 encodedir: bool,
189 177 ) {
190 178 let mut state = path_state::START;
191 179 let mut i = 0;
192 180 let len = src.len();
193 181
194 182 while i < len {
195 183 match state {
196 184 path_state::START => match src[i] {
197 185 b'/' => {
198 186 dest.write_byte(src[i]);
199 187 i += 1;
200 188 }
201 189 b'.' => {
202 190 state = path_state::LDOT;
203 191 escape3(dest, src[i]);
204 192 i += 1;
205 193 }
206 194 b' ' => {
207 195 state = path_state::DEFAULT;
208 196 escape3(dest, src[i]);
209 197 i += 1;
210 198 }
211 199 b'a' => {
212 200 state = path_state::A;
213 201 dest.write_byte(src[i]);
214 202 i += 1;
215 203 }
216 204 b'c' => {
217 205 state = path_state::C;
218 206 dest.write_byte(src[i]);
219 207 i += 1;
220 208 }
221 209 b'l' => {
222 210 state = path_state::L;
223 211 dest.write_byte(src[i]);
224 212 i += 1;
225 213 }
226 214 b'n' => {
227 215 state = path_state::N;
228 216 dest.write_byte(src[i]);
229 217 i += 1;
230 218 }
231 219 b'p' => {
232 220 state = path_state::P;
233 221 dest.write_byte(src[i]);
234 222 i += 1;
235 223 }
236 224 _ => {
237 225 state = path_state::DEFAULT;
238 226 }
239 227 },
240 228 path_state::A => {
241 229 if src[i] == b'u' {
242 230 state = path_state::AU;
243 231 dest.write_byte(src[i]);
244 232 i += 1;
245 233 } else {
246 234 state = path_state::DEFAULT;
247 235 }
248 236 }
249 237 path_state::AU => {
250 238 if src[i] == b'x' {
251 239 state = path_state::THIRD;
252 240 i += 1;
253 241 } else {
254 242 state = path_state::DEFAULT;
255 243 }
256 244 }
257 245 path_state::THIRD => {
258 246 state = path_state::DEFAULT;
259 247 match src[i] {
260 248 b'.' | b'/' | b'\0' => escape3(dest, src[i - 1]),
261 249 _ => i -= 1,
262 250 }
263 251 }
264 252 path_state::C => {
265 253 if src[i] == b'o' {
266 254 state = path_state::CO;
267 255 dest.write_byte(src[i]);
268 256 i += 1;
269 257 } else {
270 258 state = path_state::DEFAULT;
271 259 }
272 260 }
273 261 path_state::CO => {
274 262 if src[i] == b'm' {
275 263 state = path_state::COMLPT;
276 264 i += 1;
277 265 } else if src[i] == b'n' {
278 266 state = path_state::THIRD;
279 267 i += 1;
280 268 } else {
281 269 state = path_state::DEFAULT;
282 270 }
283 271 }
284 272 path_state::COMLPT => {
285 273 if src[i] >= b'1' && src[i] <= b'9' {
286 274 state = path_state::COMLPTn;
287 275 i += 1;
288 276 } else {
289 277 state = path_state::DEFAULT;
290 278 dest.write_byte(src[i - 1]);
291 279 }
292 280 }
293 281 path_state::COMLPTn => {
294 282 state = path_state::DEFAULT;
295 283 match src[i] {
296 284 b'.' | b'/' | b'\0' => {
297 285 escape3(dest, src[i - 2]);
298 286 dest.write_byte(src[i - 1]);
299 287 }
300 288 _ => {
301 289 dest.write_bytes(&src[i - 2..i]);
302 290 }
303 291 }
304 292 }
305 293 path_state::L => {
306 294 if src[i] == b'p' {
307 295 state = path_state::LP;
308 296 dest.write_byte(src[i]);
309 297 i += 1;
310 298 } else {
311 299 state = path_state::DEFAULT;
312 300 }
313 301 }
314 302 path_state::LP => {
315 303 if src[i] == b't' {
316 304 state = path_state::COMLPT;
317 305 i += 1;
318 306 } else {
319 307 state = path_state::DEFAULT;
320 308 }
321 309 }
322 310 path_state::N => {
323 311 if src[i] == b'u' {
324 312 state = path_state::NU;
325 313 dest.write_byte(src[i]);
326 314 i += 1;
327 315 } else {
328 316 state = path_state::DEFAULT;
329 317 }
330 318 }
331 319 path_state::NU => {
332 320 if src[i] == b'l' {
333 321 state = path_state::THIRD;
334 322 i += 1;
335 323 } else {
336 324 state = path_state::DEFAULT;
337 325 }
338 326 }
339 327 path_state::P => {
340 328 if src[i] == b'r' {
341 329 state = path_state::PR;
342 330 dest.write_byte(src[i]);
343 331 i += 1;
344 332 } else {
345 333 state = path_state::DEFAULT;
346 334 }
347 335 }
348 336 path_state::PR => {
349 337 if src[i] == b'n' {
350 338 state = path_state::THIRD;
351 339 i += 1;
352 340 } else {
353 341 state = path_state::DEFAULT;
354 342 }
355 343 }
356 344 path_state::LDOT => match src[i] {
357 345 b'd' | b'i' => {
358 346 state = path_state::HGDI;
359 347 dest.write_byte(src[i]);
360 348 i += 1;
361 349 }
362 350 b'h' => {
363 351 state = path_state::H;
364 352 dest.write_byte(src[i]);
365 353 i += 1;
366 354 }
367 355 _ => {
368 356 state = path_state::DEFAULT;
369 357 }
370 358 },
371 359 path_state::DOT => match src[i] {
372 360 b'/' | b'\0' => {
373 361 state = path_state::START;
374 362 dest.write_bytes(b"~2e");
375 363 dest.write_byte(src[i]);
376 364 i += 1;
377 365 }
378 366 b'd' | b'i' => {
379 367 state = path_state::HGDI;
380 368 dest.write_byte(b'.');
381 369 dest.write_byte(src[i]);
382 370 i += 1;
383 371 }
384 372 b'h' => {
385 373 state = path_state::H;
386 374 dest.write_bytes(b".h");
387 375 i += 1;
388 376 }
389 377 _ => {
390 378 state = path_state::DEFAULT;
391 379 dest.write_byte(b'.');
392 380 }
393 381 },
394 382 path_state::H => {
395 383 if src[i] == b'g' {
396 384 state = path_state::HGDI;
397 385 dest.write_byte(src[i]);
398 386 i += 1;
399 387 } else {
400 388 state = path_state::DEFAULT;
401 389 }
402 390 }
403 391 path_state::HGDI => {
404 392 if src[i] == b'/' {
405 393 state = path_state::START;
406 394 if encodedir {
407 395 dest.write_bytes(b".hg");
408 396 }
409 397 dest.write_byte(src[i]);
410 398 i += 1
411 399 } else {
412 400 state = path_state::DEFAULT;
413 401 }
414 402 }
415 403 path_state::SPACE => match src[i] {
416 404 b'/' | b'\0' => {
417 405 state = path_state::START;
418 406 dest.write_bytes(b"~20");
419 407 dest.write_byte(src[i]);
420 408 i += 1;
421 409 }
422 410 _ => {
423 411 state = path_state::DEFAULT;
424 412 dest.write_byte(b' ');
425 413 }
426 414 },
427 415 path_state::DEFAULT => {
428 416 while i != len && inset(onebyte, src[i]) {
429 417 dest.write_byte(src[i]);
430 418 i += 1;
431 419 }
432 420 if i == len {
433 421 break;
434 422 }
435 423 match src[i] {
436 424 b'.' => {
437 425 state = path_state::DOT;
438 426 i += 1
439 427 }
440 428 b' ' => {
441 429 state = path_state::SPACE;
442 430 i += 1
443 431 }
444 432 b'/' => {
445 433 state = path_state::START;
446 434 dest.write_byte(b'/');
447 435 i += 1;
448 436 }
449 437 _ => {
450 438 if inset(onebyte, src[i]) {
451 439 loop {
452 440 dest.write_byte(src[i]);
453 441 i += 1;
454 442 if !(i < len && inset(onebyte, src[i])) {
455 443 break;
456 444 }
457 445 }
458 446 } else if inset(twobytes, src[i]) {
459 447 let c = src[i];
460 448 i += 1;
461 449 dest.write_byte(b'_');
462 450 dest.write_byte(if c == b'_' {
463 451 b'_'
464 452 } else {
465 453 c + 32
466 454 });
467 455 } else {
468 456 escape3(dest, src[i]);
469 457 i += 1;
470 458 }
471 459 }
472 460 }
473 461 }
474 462 }
475 463 }
476 464 match state {
477 465 path_state::START => (),
478 466 path_state::A => (),
479 467 path_state::AU => (),
480 468 path_state::THIRD => escape3(dest, src[i - 1]),
481 469 path_state::C => (),
482 470 path_state::CO => (),
483 471 path_state::COMLPT => dest.write_byte(src[i - 1]),
484 472 path_state::COMLPTn => {
485 473 escape3(dest, src[i - 2]);
486 474 dest.write_byte(src[i - 1]);
487 475 }
488 476 path_state::L => (),
489 477 path_state::LP => (),
490 478 path_state::N => (),
491 479 path_state::NU => (),
492 480 path_state::P => (),
493 481 path_state::PR => (),
494 482 path_state::LDOT => (),
495 483 path_state::DOT => {
496 484 dest.write_bytes(b"~2e");
497 485 }
498 486 path_state::H => (),
499 487 path_state::HGDI => (),
500 488 path_state::SPACE => {
501 489 dest.write_bytes(b"~20");
502 490 }
503 491 path_state::DEFAULT => (),
504 492 }
505 493 }
506 494
507 495 fn basic_encode(dest: &mut impl Sink, src: &[u8]) {
508 496 let twobytes: [u32; 8] = [0, 0, 0x87ff_fffe, 0, 0, 0, 0, 0];
509 497 let onebyte: [u32; 8] =
510 498 [1, 0x2bff_3bfa, 0x6800_0001, 0x2fff_ffff, 0, 0, 0, 0];
511 499 _encode(&twobytes, &onebyte, dest, src, true)
512 500 }
513 501
514 502 const MAXSTOREPATHLEN: usize = 120;
515 503
516 504 fn lower_encode(dest: &mut impl Sink, src: &[u8]) {
517 505 let onebyte: [u32; 8] =
518 506 [1, 0x2bff_fbfb, 0xe800_0001, 0x2fff_ffff, 0, 0, 0, 0];
519 507 let lower: [u32; 8] = [0, 0, 0x07ff_fffe, 0, 0, 0, 0, 0];
520 508 for c in src {
521 509 if inset(&onebyte, *c) {
522 510 dest.write_byte(*c)
523 511 } else if inset(&lower, *c) {
524 512 dest.write_byte(*c + 32)
525 513 } else {
526 514 escape3(dest, *c)
527 515 }
528 516 }
529 517 }
530 518
531 519 fn aux_encode(dest: &mut impl Sink, src: &[u8]) {
532 520 let twobytes = [0; 8];
533 521 let onebyte: [u32; 8] = [!0, 0xffff_3ffe, !0, !0, !0, !0, !0, !0];
534 522 _encode(&twobytes, &onebyte, dest, src, false)
535 523 }
536 524
537 525 fn hash_mangle(src: &[u8], sha: &[u8]) -> Vec<u8> {
538 526 let dirprefixlen = 8;
539 527 let maxshortdirslen = 68;
540 528
541 529 let last_slash = src.iter().rposition(|b| *b == b'/');
542 530 let last_dot: Option<usize> = {
543 531 let s = last_slash.unwrap_or(0);
544 532 src[s..].iter().rposition(|b| *b == b'.').map(|i| i + s)
545 533 };
546 534
547 let mut dest : VecDest = VecDest::create(MAXSTOREPATHLEN);
535 let mut dest = Vec::with_capacity(MAXSTOREPATHLEN);
548 536 dest.write_bytes(b"dh/");
549 537
550 538 if let Some(last_slash) = last_slash {
551 539 for slice in src[..last_slash].split(|b| *b == b'/') {
552 540 let slice = &slice[..std::cmp::min(slice.len(), dirprefixlen)];
553 if dest.buf.len() + slice.len() > maxshortdirslen + 3 {
541 if dest.len() + slice.len() > maxshortdirslen + 3 {
554 542 break;
555 543 } else {
556 544 dest.write_bytes(slice);
557 545 }
558 546 dest.write_byte(b'/');
559 547 }
560 548 }
561 549
562 let used = dest.buf.len() + 40 + {
550 let used = dest.len() + 40 + {
563 551 if let Some(l) = last_dot {
564 552 src.len() - l
565 553 } else {
566 554 0
567 555 }
568 556 };
569 557
570 558 if MAXSTOREPATHLEN > used {
571 559 let slop = MAXSTOREPATHLEN - used;
572 560 let basenamelen = match last_slash {
573 561 Some(l) => src.len() - l - 1,
574 562 None => src.len(),
575 563 };
576 564 let basenamelen = std::cmp::min(basenamelen, slop);
577 565 if basenamelen > 0 {
578 566 let start = match last_slash {
579 567 Some(l) => l + 1,
580 568 None => 0,
581 569 };
582 570 dest.write_bytes(&src[start..][..basenamelen])
583 571 }
584 572 }
585 573 for c in sha {
586 574 hexencode(&mut dest, *c);
587 575 }
588 576 if let Some(l) = last_dot {
589 577 dest.write_bytes(&src[l..]);
590 578 }
591 dest.buf
579 dest
592 580 }
593 581
594 582 fn hash_encode(src: &[u8]) -> Vec<u8> {
595 583 let mut dired: DestArr<MAXENCODE> = DestArr::create();
596 584 let mut lowered: DestArr<MAXENCODE> = DestArr::create();
597 585 let mut auxed: DestArr<MAXENCODE> = DestArr::create();
598 586 let baselen = (src.len() - 5) * 3;
599 587 if baselen >= MAXENCODE {
600 588 panic!("path_encode::hash_encore: string too long: {}", baselen)
601 589 };
602 590 encode_dir(&mut dired, src);
603 591 let sha = Sha1::digest(dired.contents());
604 592 lower_encode(&mut lowered, &dired.contents()[5..]);
605 593 aux_encode(&mut auxed, lowered.contents());
606 594 hash_mangle(auxed.contents(), &sha)
607 595 }
608 596
609 597 pub fn path_encode(path: &[u8]) -> Vec<u8> {
610 598 let newlen = if path.len() <= MAXSTOREPATHLEN {
611 599 let mut measure = MeasureDest::create();
612 600 basic_encode(&mut measure, path);
613 601 measure.len
614 602 } else {
615 603 MAXSTOREPATHLEN + 1
616 604 };
617 605 if newlen <= MAXSTOREPATHLEN {
618 606 if newlen == path.len() {
619 607 path.to_vec()
620 608 } else {
621 let mut dest = VecDest::create(newlen);
609 let mut dest = Vec::with_capacity(newlen);
622 610 basic_encode(&mut dest, path);
623 assert!(dest.buf.len() == newlen);
624 dest.buf
611 assert!(dest.len() == newlen);
612 dest
625 613 }
626 614 } else {
627 615 hash_encode(path)
628 616 }
629 617 }
630 618
631 619 #[cfg(test)]
632 620 mod tests {
633 621 use super::*;
634 622 use crate::utils::hg_path::HgPathBuf;
635 623
636 624 #[test]
637 625 fn test_long_filename_at_root() {
638 626 let input = b"data/ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ.i";
639 627 let expected = b"dh/abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij.i708243a2237a7afae259ea3545a72a2ef11c247b.i";
640 628 let res = path_encode(input);
641 629 assert_eq!(
642 630 HgPathBuf::from_bytes(&res),
643 631 HgPathBuf::from_bytes(expected)
644 632 );
645 633 }
646 634 }
General Comments 0
You need to be logged in to leave comments. Login now