##// END OF EJS Templates
rhg: in path_encode, make DestArr generic over its size
Arseniy Alekseyev -
r51056:5d7ba99d default
parent child Browse files
Show More
@@ -1,655 +1,655 b''
1 1 use sha1::{Digest, Sha1};
2 2
3 3 #[derive(PartialEq, Debug)]
4 4 #[allow(non_camel_case_types)]
5 5 #[allow(clippy::upper_case_acronyms)]
6 6 enum path_state {
7 7 START, /* first byte of a path component */
8 8 A, /* "AUX" */
9 9 AU,
10 10 THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
11 11 C, /* "CON" or "COMn" */
12 12 CO,
13 13 COMLPT, /* "COM" or "LPT" */
14 14 COMLPTn,
15 15 L,
16 16 LP,
17 17 N,
18 18 NU,
19 19 P, /* "PRN" */
20 20 PR,
21 21 LDOT, /* leading '.' */
22 22 DOT, /* '.' in a non-leading position */
23 23 H, /* ".h" */
24 24 HGDI, /* ".hg", ".d", or ".i" */
25 25 SPACE,
26 26 DEFAULT, /* byte of a path component after the first */
27 27 }
28 28
29 29 /* state machine for dir-encoding */
30 30 #[allow(non_camel_case_types)]
31 31 #[allow(clippy::upper_case_acronyms)]
32 32 enum dir_state {
33 33 DDOT,
34 34 DH,
35 35 DHGDI,
36 36 DDEFAULT,
37 37 }
38 38
39 39 trait Sink {
40 40 fn write_byte(&mut self, c: u8);
41 41 fn write_bytes(&mut self, c: &[u8]);
42 42 }
43 43
44 44 fn inset(bitset: &[u32; 8], c: u8) -> bool {
45 45 bitset[(c as usize) >> 5] & (1 << (c & 31)) != 0
46 46 }
47 47
48 48 const MAXENCODE: usize = 4096 * 4;
49 49
50 struct DestArr {
51 buf: [u8; MAXENCODE],
50 struct DestArr<const N: usize> {
51 buf: [u8; N],
52 52 pub len: usize,
53 53 }
54 54
55 impl DestArr {
55 impl<const N: usize> DestArr<N> {
56 56 pub fn create() -> Self {
57 57 DestArr {
58 buf: [0; MAXENCODE],
58 buf: [0; N],
59 59 len: 0,
60 60 }
61 61 }
62 62
63 63 pub fn contents(&self) -> &[u8] {
64 64 &self.buf[..self.len]
65 65 }
66 66 }
67 67
68 impl Sink for DestArr {
68 impl<const N: usize> Sink for DestArr<N> {
69 69 fn write_byte(&mut self, c: u8) {
70 70 self.buf[self.len] = c;
71 71 self.len += 1;
72 72 }
73 73
74 74 fn write_bytes(&mut self, src: &[u8]) {
75 75 self.buf[self.len..self.len + src.len()].copy_from_slice(src);
76 76 self.len += src.len();
77 77 }
78 78 }
79 79
80 80 struct Dest<'a> {
81 81 dest: Option<&'a mut [u8]>,
82 82 pub len: usize,
83 83 }
84 84
85 85 impl<'a> Dest<'a> {
86 86 pub fn create(buf: &'a mut [u8]) -> Dest<'a> {
87 87 Dest {
88 88 dest: Some(buf),
89 89 len: 0,
90 90 }
91 91 }
92 92
93 93 pub fn create_measure() -> Dest<'a> {
94 94 Dest { dest: None, len: 0 }
95 95 }
96 96 }
97 97
98 98 fn rewrap_option<'a, 'b: 'a>(
99 99 x: &'a mut Option<&'b mut [u8]>,
100 100 ) -> Option<&'a mut [u8]> {
101 101 match x {
102 102 None => None,
103 103 Some(y) => Some(y),
104 104 }
105 105 }
106 106
107 107 impl<'a> Sink for Dest<'a> {
108 108 fn write_byte(&mut self, c: u8) {
109 109 if let Some(slice) = rewrap_option(&mut self.dest) {
110 110 slice[self.len] = c
111 111 }
112 112 self.len += 1
113 113 }
114 114
115 115 fn write_bytes(&mut self, src: &[u8]) {
116 116 if let Some(slice) = rewrap_option(&mut self.dest) {
117 117 slice[self.len..self.len + src.len()].copy_from_slice(src)
118 118 }
119 119 self.len += src.len();
120 120 }
121 121 }
122 122
123 123 fn hexencode(dest: &mut impl Sink, c: u8) {
124 124 let hexdigit = b"0123456789abcdef";
125 125 dest.write_byte(hexdigit[(c as usize) >> 4]);
126 126 dest.write_byte(hexdigit[(c as usize) & 15]);
127 127 }
128 128
129 129 /* 3-byte escape: tilde followed by two hex digits */
130 130 fn escape3(dest: &mut impl Sink, c: u8) {
131 131 dest.write_byte(b'~');
132 132 hexencode(dest, c);
133 133 }
134 134
135 135 fn encode_dir(dest: &mut impl Sink, src: &[u8]) {
136 136 let mut state = dir_state::DDEFAULT;
137 137 let mut i = 0;
138 138
139 139 while i < src.len() {
140 140 match state {
141 141 dir_state::DDOT => match src[i] {
142 142 b'd' | b'i' => {
143 143 state = dir_state::DHGDI;
144 144 dest.write_byte(src[i]);
145 145 i += 1;
146 146 }
147 147 b'h' => {
148 148 state = dir_state::DH;
149 149 dest.write_byte(src[i]);
150 150 i += 1;
151 151 }
152 152 _ => {
153 153 state = dir_state::DDEFAULT;
154 154 }
155 155 },
156 156 dir_state::DH => {
157 157 if src[i] == b'g' {
158 158 state = dir_state::DHGDI;
159 159 dest.write_byte(src[i]);
160 160 i += 1;
161 161 } else {
162 162 state = dir_state::DDEFAULT;
163 163 }
164 164 }
165 165 dir_state::DHGDI => {
166 166 if src[i] == b'/' {
167 167 dest.write_bytes(b".hg");
168 168 dest.write_byte(src[i]);
169 169 i += 1;
170 170 }
171 171 state = dir_state::DDEFAULT;
172 172 }
173 173 dir_state::DDEFAULT => {
174 174 if src[i] == b'.' {
175 175 state = dir_state::DDOT
176 176 }
177 177 dest.write_byte(src[i]);
178 178 i += 1;
179 179 }
180 180 }
181 181 }
182 182 }
183 183
184 184 fn _encode(
185 185 twobytes: &[u32; 8],
186 186 onebyte: &[u32; 8],
187 187 dest: &mut impl Sink,
188 188 src: &[u8],
189 189 encodedir: bool,
190 190 ) {
191 191 let mut state = path_state::START;
192 192 let mut i = 0;
193 193 let len = src.len();
194 194
195 195 while i < len {
196 196 match state {
197 197 path_state::START => match src[i] {
198 198 b'/' => {
199 199 dest.write_byte(src[i]);
200 200 i += 1;
201 201 }
202 202 b'.' => {
203 203 state = path_state::LDOT;
204 204 escape3(dest, src[i]);
205 205 i += 1;
206 206 }
207 207 b' ' => {
208 208 state = path_state::DEFAULT;
209 209 escape3(dest, src[i]);
210 210 i += 1;
211 211 }
212 212 b'a' => {
213 213 state = path_state::A;
214 214 dest.write_byte(src[i]);
215 215 i += 1;
216 216 }
217 217 b'c' => {
218 218 state = path_state::C;
219 219 dest.write_byte(src[i]);
220 220 i += 1;
221 221 }
222 222 b'l' => {
223 223 state = path_state::L;
224 224 dest.write_byte(src[i]);
225 225 i += 1;
226 226 }
227 227 b'n' => {
228 228 state = path_state::N;
229 229 dest.write_byte(src[i]);
230 230 i += 1;
231 231 }
232 232 b'p' => {
233 233 state = path_state::P;
234 234 dest.write_byte(src[i]);
235 235 i += 1;
236 236 }
237 237 _ => {
238 238 state = path_state::DEFAULT;
239 239 }
240 240 },
241 241 path_state::A => {
242 242 if src[i] == b'u' {
243 243 state = path_state::AU;
244 244 dest.write_byte(src[i]);
245 245 i += 1;
246 246 } else {
247 247 state = path_state::DEFAULT;
248 248 }
249 249 }
250 250 path_state::AU => {
251 251 if src[i] == b'x' {
252 252 state = path_state::THIRD;
253 253 i += 1;
254 254 } else {
255 255 state = path_state::DEFAULT;
256 256 }
257 257 }
258 258 path_state::THIRD => {
259 259 state = path_state::DEFAULT;
260 260 match src[i] {
261 261 b'.' | b'/' | b'\0' => escape3(dest, src[i - 1]),
262 262 _ => i -= 1,
263 263 }
264 264 }
265 265 path_state::C => {
266 266 if src[i] == b'o' {
267 267 state = path_state::CO;
268 268 dest.write_byte(src[i]);
269 269 i += 1;
270 270 } else {
271 271 state = path_state::DEFAULT;
272 272 }
273 273 }
274 274 path_state::CO => {
275 275 if src[i] == b'm' {
276 276 state = path_state::COMLPT;
277 277 i += 1;
278 278 } else if src[i] == b'n' {
279 279 state = path_state::THIRD;
280 280 i += 1;
281 281 } else {
282 282 state = path_state::DEFAULT;
283 283 }
284 284 }
285 285 path_state::COMLPT => {
286 286 if src[i] >= b'1' && src[i] <= b'9' {
287 287 state = path_state::COMLPTn;
288 288 i += 1;
289 289 } else {
290 290 state = path_state::DEFAULT;
291 291 dest.write_byte(src[i - 1]);
292 292 }
293 293 }
294 294 path_state::COMLPTn => {
295 295 state = path_state::DEFAULT;
296 296 match src[i] {
297 297 b'.' | b'/' | b'\0' => {
298 298 escape3(dest, src[i - 2]);
299 299 dest.write_byte(src[i - 1]);
300 300 }
301 301 _ => {
302 302 dest.write_bytes(&src[i - 2..i]);
303 303 }
304 304 }
305 305 }
306 306 path_state::L => {
307 307 if src[i] == b'p' {
308 308 state = path_state::LP;
309 309 dest.write_byte(src[i]);
310 310 i += 1;
311 311 } else {
312 312 state = path_state::DEFAULT;
313 313 }
314 314 }
315 315 path_state::LP => {
316 316 if src[i] == b't' {
317 317 state = path_state::COMLPT;
318 318 i += 1;
319 319 } else {
320 320 state = path_state::DEFAULT;
321 321 }
322 322 }
323 323 path_state::N => {
324 324 if src[i] == b'u' {
325 325 state = path_state::NU;
326 326 dest.write_byte(src[i]);
327 327 i += 1;
328 328 } else {
329 329 state = path_state::DEFAULT;
330 330 }
331 331 }
332 332 path_state::NU => {
333 333 if src[i] == b'l' {
334 334 state = path_state::THIRD;
335 335 i += 1;
336 336 } else {
337 337 state = path_state::DEFAULT;
338 338 }
339 339 }
340 340 path_state::P => {
341 341 if src[i] == b'r' {
342 342 state = path_state::PR;
343 343 dest.write_byte(src[i]);
344 344 i += 1;
345 345 } else {
346 346 state = path_state::DEFAULT;
347 347 }
348 348 }
349 349 path_state::PR => {
350 350 if src[i] == b'n' {
351 351 state = path_state::THIRD;
352 352 i += 1;
353 353 } else {
354 354 state = path_state::DEFAULT;
355 355 }
356 356 }
357 357 path_state::LDOT => match src[i] {
358 358 b'd' | b'i' => {
359 359 state = path_state::HGDI;
360 360 dest.write_byte(src[i]);
361 361 i += 1;
362 362 }
363 363 b'h' => {
364 364 state = path_state::H;
365 365 dest.write_byte(src[i]);
366 366 i += 1;
367 367 }
368 368 _ => {
369 369 state = path_state::DEFAULT;
370 370 }
371 371 },
372 372 path_state::DOT => match src[i] {
373 373 b'/' | b'\0' => {
374 374 state = path_state::START;
375 375 dest.write_bytes(b"~2e");
376 376 dest.write_byte(src[i]);
377 377 i += 1;
378 378 }
379 379 b'd' | b'i' => {
380 380 state = path_state::HGDI;
381 381 dest.write_byte(b'.');
382 382 dest.write_byte(src[i]);
383 383 i += 1;
384 384 }
385 385 b'h' => {
386 386 state = path_state::H;
387 387 dest.write_bytes(b".h");
388 388 i += 1;
389 389 }
390 390 _ => {
391 391 state = path_state::DEFAULT;
392 392 dest.write_byte(b'.');
393 393 }
394 394 },
395 395 path_state::H => {
396 396 if src[i] == b'g' {
397 397 state = path_state::HGDI;
398 398 dest.write_byte(src[i]);
399 399 i += 1;
400 400 } else {
401 401 state = path_state::DEFAULT;
402 402 }
403 403 }
404 404 path_state::HGDI => {
405 405 if src[i] == b'/' {
406 406 state = path_state::START;
407 407 if encodedir {
408 408 dest.write_bytes(b".hg");
409 409 }
410 410 dest.write_byte(src[i]);
411 411 i += 1
412 412 } else {
413 413 state = path_state::DEFAULT;
414 414 }
415 415 }
416 416 path_state::SPACE => match src[i] {
417 417 b'/' | b'\0' => {
418 418 state = path_state::START;
419 419 dest.write_bytes(b"~20");
420 420 dest.write_byte(src[i]);
421 421 i += 1;
422 422 }
423 423 _ => {
424 424 state = path_state::DEFAULT;
425 425 dest.write_byte(b' ');
426 426 }
427 427 },
428 428 path_state::DEFAULT => {
429 429 while i != len && inset(onebyte, src[i]) {
430 430 dest.write_byte(src[i]);
431 431 i += 1;
432 432 }
433 433 if i == len {
434 434 break;
435 435 }
436 436 match src[i] {
437 437 b'.' => {
438 438 state = path_state::DOT;
439 439 i += 1
440 440 }
441 441 b' ' => {
442 442 state = path_state::SPACE;
443 443 i += 1
444 444 }
445 445 b'/' => {
446 446 state = path_state::START;
447 447 dest.write_byte(b'/');
448 448 i += 1;
449 449 }
450 450 _ => {
451 451 if inset(onebyte, src[i]) {
452 452 loop {
453 453 dest.write_byte(src[i]);
454 454 i += 1;
455 455 if !(i < len && inset(onebyte, src[i])) {
456 456 break;
457 457 }
458 458 }
459 459 } else if inset(twobytes, src[i]) {
460 460 let c = src[i];
461 461 i += 1;
462 462 dest.write_byte(b'_');
463 463 dest.write_byte(if c == b'_' {
464 464 b'_'
465 465 } else {
466 466 c + 32
467 467 });
468 468 } else {
469 469 escape3(dest, src[i]);
470 470 i += 1;
471 471 }
472 472 }
473 473 }
474 474 }
475 475 }
476 476 }
477 477 match state {
478 478 path_state::START => (),
479 479 path_state::A => (),
480 480 path_state::AU => (),
481 481 path_state::THIRD => escape3(dest, src[i - 1]),
482 482 path_state::C => (),
483 483 path_state::CO => (),
484 484 path_state::COMLPT => dest.write_byte(src[i - 1]),
485 485 path_state::COMLPTn => {
486 486 escape3(dest, src[i - 2]);
487 487 dest.write_byte(src[i - 1]);
488 488 }
489 489 path_state::L => (),
490 490 path_state::LP => (),
491 491 path_state::N => (),
492 492 path_state::NU => (),
493 493 path_state::P => (),
494 494 path_state::PR => (),
495 495 path_state::LDOT => (),
496 496 path_state::DOT => {
497 497 dest.write_bytes(b"~2e");
498 498 }
499 499 path_state::H => (),
500 500 path_state::HGDI => (),
501 501 path_state::SPACE => {
502 502 dest.write_bytes(b"~20");
503 503 }
504 504 path_state::DEFAULT => (),
505 505 }
506 506 }
507 507
508 508 fn basic_encode(dest: &mut impl Sink, src: &[u8]) {
509 509 let twobytes: [u32; 8] = [0, 0, 0x87ff_fffe, 0, 0, 0, 0, 0];
510 510 let onebyte: [u32; 8] =
511 511 [1, 0x2bff_3bfa, 0x6800_0001, 0x2fff_ffff, 0, 0, 0, 0];
512 512 _encode(&twobytes, &onebyte, dest, src, true)
513 513 }
514 514
515 515 const MAXSTOREPATHLEN: usize = 120;
516 516
517 517 fn lower_encode(dest: &mut impl Sink, src: &[u8]) {
518 518 let onebyte: [u32; 8] =
519 519 [1, 0x2bff_fbfb, 0xe800_0001, 0x2fff_ffff, 0, 0, 0, 0];
520 520 let lower: [u32; 8] = [0, 0, 0x07ff_fffe, 0, 0, 0, 0, 0];
521 521 for c in src {
522 522 if inset(&onebyte, *c) {
523 523 dest.write_byte(*c)
524 524 } else if inset(&lower, *c) {
525 525 dest.write_byte(*c + 32)
526 526 } else {
527 527 escape3(dest, *c)
528 528 }
529 529 }
530 530 }
531 531
532 532 fn aux_encode(dest: &mut impl Sink, src: &[u8]) {
533 533 let twobytes = [0; 8];
534 534 let onebyte: [u32; 8] = [!0, 0xffff_3ffe, !0, !0, !0, !0, !0, !0];
535 535 _encode(&twobytes, &onebyte, dest, src, false)
536 536 }
537 537
538 538 fn hash_mangle(src: &[u8], sha: &[u8]) -> Vec<u8> {
539 539 let dirprefixlen = 8;
540 540 let maxshortdirslen = 68;
541 541
542 542 let last_slash = src.iter().rposition(|b| *b == b'/');
543 543 let last_dot: Option<usize> = {
544 544 let s = last_slash.unwrap_or(0);
545 545 src[s..].iter().rposition(|b| *b == b'.').map(|i| i + s)
546 546 };
547 547
548 548 let mut dest_vec = vec![0; MAXSTOREPATHLEN];
549 549 let mut dest = Dest::create(&mut dest_vec);
550 550 dest.write_bytes(b"dh/");
551 551
552 552 if let Some(last_slash) = last_slash {
553 553 for slice in src[..last_slash].split(|b| *b == b'/') {
554 554 let slice = &slice[..std::cmp::min(slice.len(), dirprefixlen)];
555 555 if dest.len + slice.len() > maxshortdirslen + 3 {
556 556 break;
557 557 } else {
558 558 dest.write_bytes(slice);
559 559 }
560 560 dest.write_byte(b'/');
561 561 }
562 562 }
563 563
564 564 let used = dest.len + 40 + {
565 565 if let Some(l) = last_dot {
566 566 src.len() - l
567 567 } else {
568 568 0
569 569 }
570 570 };
571 571
572 572 if MAXSTOREPATHLEN > used {
573 573 let slop = MAXSTOREPATHLEN - used;
574 574 let basenamelen = match last_slash {
575 575 Some(l) => src.len() - l - 1,
576 576 None => src.len(),
577 577 };
578 578 let basenamelen = std::cmp::min(basenamelen, slop);
579 579 if basenamelen > 0 {
580 580 let start = match last_slash {
581 581 Some(l) => l + 1,
582 582 None => 0,
583 583 };
584 584 dest.write_bytes(&src[start..][..basenamelen])
585 585 }
586 586 }
587 587 for c in sha {
588 588 hexencode(&mut dest, *c);
589 589 }
590 590 if let Some(l) = last_dot {
591 591 dest.write_bytes(&src[l..]);
592 592 }
593 593 let destlen = dest.len;
594 594 if destlen == dest_vec.len() {
595 595 dest_vec
596 596 } else {
597 597 // sometimes the path are shorter than MAXSTOREPATHLEN
598 598 dest_vec[..destlen].to_vec()
599 599 }
600 600 }
601 601
602 602 fn hash_encode(src: &[u8]) -> Vec<u8> {
603 let mut dired = DestArr::create();
604 let mut lowered = DestArr::create();
605 let mut auxed = DestArr::create();
603 let mut dired: DestArr<MAXENCODE> = DestArr::create();
604 let mut lowered: DestArr<MAXENCODE> = DestArr::create();
605 let mut auxed: DestArr<MAXENCODE> = DestArr::create();
606 606 let baselen = (src.len() - 5) * 3;
607 607 if baselen >= MAXENCODE {
608 608 panic!("path_encode::hash_encore: string too long: {}", baselen)
609 609 };
610 610 encode_dir(&mut dired, src);
611 611 let sha = Sha1::digest(dired.contents());
612 612 lower_encode(&mut lowered, &dired.contents()[5..]);
613 613 aux_encode(&mut auxed, lowered.contents());
614 614 hash_mangle(auxed.contents(), &sha)
615 615 }
616 616
617 617 pub fn path_encode(path: &[u8]) -> Vec<u8> {
618 618 let newlen = if path.len() <= MAXSTOREPATHLEN {
619 619 let mut measure = Dest::create_measure();
620 620 basic_encode(&mut measure, path);
621 621 measure.len
622 622 } else {
623 623 MAXSTOREPATHLEN + 1
624 624 };
625 625 if newlen <= MAXSTOREPATHLEN {
626 626 if newlen == path.len() {
627 627 path.to_vec()
628 628 } else {
629 629 let mut res = vec![0; newlen];
630 630 let mut dest = Dest::create(&mut res);
631 631 basic_encode(&mut dest, path);
632 632 assert!(dest.len == newlen);
633 633 res
634 634 }
635 635 } else {
636 636 hash_encode(path)
637 637 }
638 638 }
639 639
640 640 #[cfg(test)]
641 641 mod tests {
642 642 use super::*;
643 643 use crate::utils::hg_path::HgPathBuf;
644 644
645 645 #[test]
646 646 fn test_long_filename_at_root() {
647 647 let input = b"data/ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ.i";
648 648 let expected = b"dh/abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij.i708243a2237a7afae259ea3545a72a2ef11c247b.i";
649 649 let res = path_encode(input);
650 650 assert_eq!(
651 651 HgPathBuf::from_bytes(&res),
652 652 HgPathBuf::from_bytes(expected)
653 653 );
654 654 }
655 655 }
General Comments 0
You need to be logged in to leave comments. Login now