##// END OF EJS Templates
rhg: demonstrate a bug in path_encode...
Arseniy Alekseyev -
r50990:362fe347 default
parent child Browse files
Show More
@@ -1,645 +1,664 b''
1 1 use sha1::{Digest, Sha1};
2 2
3 3 #[derive(PartialEq, Debug)]
4 4 #[allow(non_camel_case_types)]
5 5 #[allow(clippy::upper_case_acronyms)]
6 6 enum path_state {
7 7 START, /* first byte of a path component */
8 8 A, /* "AUX" */
9 9 AU,
10 10 THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
11 11 C, /* "CON" or "COMn" */
12 12 CO,
13 13 COMLPT, /* "COM" or "LPT" */
14 14 COMLPTn,
15 15 L,
16 16 LP,
17 17 N,
18 18 NU,
19 19 P, /* "PRN" */
20 20 PR,
21 21 LDOT, /* leading '.' */
22 22 DOT, /* '.' in a non-leading position */
23 23 H, /* ".h" */
24 24 HGDI, /* ".hg", ".d", or ".i" */
25 25 SPACE,
26 26 DEFAULT, /* byte of a path component after the first */
27 27 }
28 28
29 29 /* state machine for dir-encoding */
30 30 #[allow(non_camel_case_types)]
31 31 #[allow(clippy::upper_case_acronyms)]
32 32 enum dir_state {
33 33 DDOT,
34 34 DH,
35 35 DHGDI,
36 36 DDEFAULT,
37 37 }
38 38
39 39 fn inset(bitset: &[u32; 8], c: u8) -> bool {
40 40 bitset[(c as usize) >> 5] & (1 << (c & 31)) != 0
41 41 }
42 42
43 43 fn charcopy(dest: Option<&mut [u8]>, destlen: &mut usize, c: u8) {
44 44 if let Some(slice) = dest {
45 45 slice[*destlen] = c
46 46 }
47 47 *destlen += 1
48 48 }
49 49
50 50 fn memcopy(dest: Option<&mut [u8]>, destlen: &mut usize, src: &[u8]) {
51 51 if let Some(slice) = dest {
52 52 slice[*destlen..*destlen + src.len()].copy_from_slice(src)
53 53 }
54 54 *destlen += src.len();
55 55 }
56 56
57 57 fn rewrap_option<'a, 'b: 'a>(
58 58 x: &'a mut Option<&'b mut [u8]>,
59 59 ) -> Option<&'a mut [u8]> {
60 60 match x {
61 61 None => None,
62 62 Some(y) => Some(y),
63 63 }
64 64 }
65 65
66 66 fn hexencode(mut dest: Option<&mut [u8]>, destlen: &mut usize, c: u8) {
67 67 let hexdigit = b"0123456789abcdef";
68 68 charcopy(
69 69 rewrap_option(&mut dest),
70 70 destlen,
71 71 hexdigit[(c as usize) >> 4],
72 72 );
73 73 charcopy(dest, destlen, hexdigit[(c as usize) & 15]);
74 74 }
75 75
76 76 /* 3-byte escape: tilde followed by two hex digits */
77 77 fn escape3(mut dest: Option<&mut [u8]>, destlen: &mut usize, c: u8) {
78 78 charcopy(rewrap_option(&mut dest), destlen, b'~');
79 79 hexencode(dest, destlen, c);
80 80 }
81 81
82 82 fn encode_dir(mut dest: Option<&mut [u8]>, src: &[u8]) -> usize {
83 83 let mut state = dir_state::DDEFAULT;
84 84 let mut i = 0;
85 85 let mut destlen = 0;
86 86
87 87 while i < src.len() {
88 88 match state {
89 89 dir_state::DDOT => match src[i] {
90 90 b'd' | b'i' => {
91 91 state = dir_state::DHGDI;
92 92 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
93 93 i += 1;
94 94 }
95 95 b'h' => {
96 96 state = dir_state::DH;
97 97 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
98 98 i += 1;
99 99 }
100 100 _ => {
101 101 state = dir_state::DDEFAULT;
102 102 }
103 103 },
104 104 dir_state::DH => {
105 105 if src[i] == b'g' {
106 106 state = dir_state::DHGDI;
107 107 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
108 108 i += 1;
109 109 } else {
110 110 state = dir_state::DDEFAULT;
111 111 }
112 112 }
113 113 dir_state::DHGDI => {
114 114 if src[i] == b'/' {
115 115 memcopy(rewrap_option(&mut dest), &mut destlen, b".hg");
116 116 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
117 117 i += 1;
118 118 }
119 119 state = dir_state::DDEFAULT;
120 120 }
121 121 dir_state::DDEFAULT => {
122 122 if src[i] == b'.' {
123 123 state = dir_state::DDOT
124 124 }
125 125 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
126 126 i += 1;
127 127 }
128 128 }
129 129 }
130 130 destlen
131 131 }
132 132
133 133 fn _encode(
134 134 twobytes: &[u32; 8],
135 135 onebyte: &[u32; 8],
136 136 mut dest: Option<&mut [u8]>,
137 137 src: &[u8],
138 138 encodedir: bool,
139 139 ) -> usize {
140 140 let mut state = path_state::START;
141 141 let mut i = 0;
142 142 let mut destlen = 0;
143 143 let len = src.len();
144 144
145 145 while i < len {
146 146 match state {
147 147 path_state::START => match src[i] {
148 148 b'/' => {
149 149 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
150 150 i += 1;
151 151 }
152 152 b'.' => {
153 153 state = path_state::LDOT;
154 154 escape3(rewrap_option(&mut dest), &mut destlen, src[i]);
155 155 i += 1;
156 156 }
157 157 b' ' => {
158 158 state = path_state::DEFAULT;
159 159 escape3(rewrap_option(&mut dest), &mut destlen, src[i]);
160 160 i += 1;
161 161 }
162 162 b'a' => {
163 163 state = path_state::A;
164 164 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
165 165 i += 1;
166 166 }
167 167 b'c' => {
168 168 state = path_state::C;
169 169 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
170 170 i += 1;
171 171 }
172 172 b'l' => {
173 173 state = path_state::L;
174 174 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
175 175 i += 1;
176 176 }
177 177 b'n' => {
178 178 state = path_state::N;
179 179 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
180 180 i += 1;
181 181 }
182 182 b'p' => {
183 183 state = path_state::P;
184 184 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
185 185 i += 1;
186 186 }
187 187 _ => {
188 188 state = path_state::DEFAULT;
189 189 }
190 190 },
191 191 path_state::A => {
192 192 if src[i] == b'u' {
193 193 state = path_state::AU;
194 194 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
195 195 i += 1;
196 196 } else {
197 197 state = path_state::DEFAULT;
198 198 }
199 199 }
200 200 path_state::AU => {
201 201 if src[i] == b'x' {
202 202 state = path_state::THIRD;
203 203 i += 1;
204 204 } else {
205 205 state = path_state::DEFAULT;
206 206 }
207 207 }
208 208 path_state::THIRD => {
209 209 state = path_state::DEFAULT;
210 210 match src[i] {
211 211 b'.' | b'/' | b'\0' => escape3(
212 212 rewrap_option(&mut dest),
213 213 &mut destlen,
214 214 src[i - 1],
215 215 ),
216 216 _ => i -= 1,
217 217 }
218 218 }
219 219 path_state::C => {
220 220 if src[i] == b'o' {
221 221 state = path_state::CO;
222 222 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
223 223 i += 1;
224 224 } else {
225 225 state = path_state::DEFAULT;
226 226 }
227 227 }
228 228 path_state::CO => {
229 229 if src[i] == b'm' {
230 230 state = path_state::COMLPT;
231 231 i += 1;
232 232 } else if src[i] == b'n' {
233 233 state = path_state::THIRD;
234 234 i += 1;
235 235 } else {
236 236 state = path_state::DEFAULT;
237 237 }
238 238 }
239 239 path_state::COMLPT => {
240 240 if src[i] >= b'1' && src[i] <= b'9' {
241 241 state = path_state::COMLPTn;
242 242 i += 1;
243 243 } else {
244 244 state = path_state::DEFAULT;
245 245 charcopy(
246 246 rewrap_option(&mut dest),
247 247 &mut destlen,
248 248 src[i - 1],
249 249 );
250 250 }
251 251 }
252 252 path_state::COMLPTn => {
253 253 state = path_state::DEFAULT;
254 254 match src[i] {
255 255 b'.' | b'/' | b'\0' => {
256 256 escape3(
257 257 rewrap_option(&mut dest),
258 258 &mut destlen,
259 259 src[i - 2],
260 260 );
261 261 charcopy(
262 262 rewrap_option(&mut dest),
263 263 &mut destlen,
264 264 src[i - 1],
265 265 );
266 266 }
267 267 _ => {
268 268 memcopy(
269 269 rewrap_option(&mut dest),
270 270 &mut destlen,
271 271 &src[i - 2..i],
272 272 );
273 273 }
274 274 }
275 275 }
276 276 path_state::L => {
277 277 if src[i] == b'p' {
278 278 state = path_state::LP;
279 279 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
280 280 i += 1;
281 281 } else {
282 282 state = path_state::DEFAULT;
283 283 }
284 284 }
285 285 path_state::LP => {
286 286 if src[i] == b't' {
287 287 state = path_state::COMLPT;
288 288 i += 1;
289 289 } else {
290 290 state = path_state::DEFAULT;
291 291 }
292 292 }
293 293 path_state::N => {
294 294 if src[i] == b'u' {
295 295 state = path_state::NU;
296 296 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
297 297 i += 1;
298 298 } else {
299 299 state = path_state::DEFAULT;
300 300 }
301 301 }
302 302 path_state::NU => {
303 303 if src[i] == b'l' {
304 304 state = path_state::THIRD;
305 305 i += 1;
306 306 } else {
307 307 state = path_state::DEFAULT;
308 308 }
309 309 }
310 310 path_state::P => {
311 311 if src[i] == b'r' {
312 312 state = path_state::PR;
313 313 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
314 314 i += 1;
315 315 } else {
316 316 state = path_state::DEFAULT;
317 317 }
318 318 }
319 319 path_state::PR => {
320 320 if src[i] == b'n' {
321 321 state = path_state::THIRD;
322 322 i += 1;
323 323 } else {
324 324 state = path_state::DEFAULT;
325 325 }
326 326 }
327 327 path_state::LDOT => match src[i] {
328 328 b'd' | b'i' => {
329 329 state = path_state::HGDI;
330 330 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
331 331 i += 1;
332 332 }
333 333 b'h' => {
334 334 state = path_state::H;
335 335 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
336 336 i += 1;
337 337 }
338 338 _ => {
339 339 state = path_state::DEFAULT;
340 340 }
341 341 },
342 342 path_state::DOT => match src[i] {
343 343 b'/' | b'\0' => {
344 344 state = path_state::START;
345 345 memcopy(rewrap_option(&mut dest), &mut destlen, b"~2e");
346 346 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
347 347 i += 1;
348 348 }
349 349 b'd' | b'i' => {
350 350 state = path_state::HGDI;
351 351 charcopy(rewrap_option(&mut dest), &mut destlen, b'.');
352 352 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
353 353 i += 1;
354 354 }
355 355 b'h' => {
356 356 state = path_state::H;
357 357 memcopy(rewrap_option(&mut dest), &mut destlen, b".h");
358 358 i += 1;
359 359 }
360 360 _ => {
361 361 state = path_state::DEFAULT;
362 362 charcopy(rewrap_option(&mut dest), &mut destlen, b'.');
363 363 }
364 364 },
365 365 path_state::H => {
366 366 if src[i] == b'g' {
367 367 state = path_state::HGDI;
368 368 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
369 369 i += 1;
370 370 } else {
371 371 state = path_state::DEFAULT;
372 372 }
373 373 }
374 374 path_state::HGDI => {
375 375 if src[i] == b'/' {
376 376 state = path_state::START;
377 377 if encodedir {
378 378 memcopy(
379 379 rewrap_option(&mut dest),
380 380 &mut destlen,
381 381 b".hg",
382 382 );
383 383 }
384 384 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
385 385 i += 1
386 386 } else {
387 387 state = path_state::DEFAULT;
388 388 }
389 389 }
390 390 path_state::SPACE => match src[i] {
391 391 b'/' | b'\0' => {
392 392 state = path_state::START;
393 393 memcopy(rewrap_option(&mut dest), &mut destlen, b"~20");
394 394 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
395 395 i += 1;
396 396 }
397 397 _ => {
398 398 state = path_state::DEFAULT;
399 399 charcopy(rewrap_option(&mut dest), &mut destlen, b' ');
400 400 }
401 401 },
402 402 path_state::DEFAULT => {
403 403 while i != len && inset(onebyte, src[i]) {
404 404 charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
405 405 i += 1;
406 406 }
407 407 if i == len {
408 408 break;
409 409 }
410 410 match src[i] {
411 411 b'.' => {
412 412 state = path_state::DOT;
413 413 i += 1
414 414 }
415 415 b' ' => {
416 416 state = path_state::SPACE;
417 417 i += 1
418 418 }
419 419 b'/' => {
420 420 state = path_state::START;
421 421 charcopy(rewrap_option(&mut dest), &mut destlen, b'/');
422 422 i += 1;
423 423 }
424 424 _ => {
425 425 if inset(onebyte, src[i]) {
426 426 loop {
427 427 charcopy(
428 428 rewrap_option(&mut dest),
429 429 &mut destlen,
430 430 src[i],
431 431 );
432 432 i += 1;
433 433 if !(i < len && inset(onebyte, src[i])) {
434 434 break;
435 435 }
436 436 }
437 437 } else if inset(twobytes, src[i]) {
438 438 let c = src[i];
439 439 i += 1;
440 440 charcopy(
441 441 rewrap_option(&mut dest),
442 442 &mut destlen,
443 443 b'_',
444 444 );
445 445 charcopy(
446 446 rewrap_option(&mut dest),
447 447 &mut destlen,
448 448 if c == b'_' { b'_' } else { c + 32 },
449 449 );
450 450 } else {
451 451 escape3(
452 452 rewrap_option(&mut dest),
453 453 &mut destlen,
454 454 src[i],
455 455 );
456 456 i += 1;
457 457 }
458 458 }
459 459 }
460 460 }
461 461 }
462 462 }
463 463 match state {
464 464 path_state::START => (),
465 465 path_state::A => (),
466 466 path_state::AU => (),
467 467 path_state::THIRD => {
468 468 escape3(rewrap_option(&mut dest), &mut destlen, src[i - 1])
469 469 }
470 470 path_state::C => (),
471 471 path_state::CO => (),
472 472 path_state::COMLPT => {
473 473 charcopy(rewrap_option(&mut dest), &mut destlen, src[i - 1])
474 474 }
475 475 path_state::COMLPTn => {
476 476 escape3(rewrap_option(&mut dest), &mut destlen, src[i - 2]);
477 477 charcopy(rewrap_option(&mut dest), &mut destlen, src[i - 1]);
478 478 }
479 479 path_state::L => (),
480 480 path_state::LP => (),
481 481 path_state::N => (),
482 482 path_state::NU => (),
483 483 path_state::P => (),
484 484 path_state::PR => (),
485 485 path_state::LDOT => (),
486 486 path_state::DOT => {
487 487 memcopy(rewrap_option(&mut dest), &mut destlen, b"~2e");
488 488 }
489 489 path_state::H => (),
490 490 path_state::HGDI => (),
491 491 path_state::SPACE => {
492 492 memcopy(rewrap_option(&mut dest), &mut destlen, b"~20");
493 493 }
494 494 path_state::DEFAULT => (),
495 495 };
496 496 destlen
497 497 }
498 498
499 499 fn basic_encode(dest: Option<&mut [u8]>, src: &[u8]) -> usize {
500 500 let twobytes: [u32; 8] = [0, 0, 0x87ff_fffe, 0, 0, 0, 0, 0];
501 501 let onebyte: [u32; 8] =
502 502 [1, 0x2bff_3bfa, 0x6800_0001, 0x2fff_ffff, 0, 0, 0, 0];
503 503 _encode(&twobytes, &onebyte, dest, src, true)
504 504 }
505 505
506 506 const MAXSTOREPATHLEN: usize = 120;
507 507
508 508 fn lower_encode(mut dest: Option<&mut [u8]>, src: &[u8]) -> usize {
509 509 let onebyte: [u32; 8] =
510 510 [1, 0x2bff_fbfb, 0xe800_0001, 0x2fff_ffff, 0, 0, 0, 0];
511 511 let lower: [u32; 8] = [0, 0, 0x07ff_fffe, 0, 0, 0, 0, 0];
512 512 let mut destlen = 0;
513 513 for c in src {
514 514 if inset(&onebyte, *c) {
515 515 charcopy(rewrap_option(&mut dest), &mut destlen, *c)
516 516 } else if inset(&lower, *c) {
517 517 charcopy(rewrap_option(&mut dest), &mut destlen, *c + 32)
518 518 } else {
519 519 escape3(rewrap_option(&mut dest), &mut destlen, *c)
520 520 }
521 521 }
522 522 destlen
523 523 }
524 524
525 525 fn aux_encode(dest: Option<&mut [u8]>, src: &[u8]) -> usize {
526 526 let twobytes = [0; 8];
527 527 let onebyte: [u32; 8] = [!0, 0xffff_3ffe, !0, !0, !0, !0, !0, !0];
528 528 _encode(&twobytes, &onebyte, dest, src, false)
529 529 }
530 530
531 531 fn hash_mangle(src: &[u8], sha: &[u8]) -> Vec<u8> {
532 532 let dirprefixlen = 8;
533 533 let maxshortdirslen = 68;
534 534 let mut destlen = 0;
535 535
536 536 let last_slash = src.iter().rposition(|b| *b == b'/');
537 537 let last_dot: Option<usize> = {
538 538 let s = last_slash.unwrap_or(0);
539 539 src[s..].iter().rposition(|b| *b == b'.').map(|i| i + s)
540 540 };
541 541
542 542 let mut dest = vec![0; MAXSTOREPATHLEN];
543 543 memcopy(Some(&mut dest), &mut destlen, b"dh/");
544 544
545 545 {
546 546 let mut first = true;
547 547 for slice in
548 548 src[..last_slash.unwrap_or(src.len())].split(|b| *b == b'/')
549 549 {
550 550 let slice = &slice[..std::cmp::min(slice.len(), dirprefixlen)];
551 551 if destlen + (slice.len() + if first { 0 } else { 1 })
552 552 > maxshortdirslen + 3
553 553 {
554 554 break;
555 555 } else {
556 556 if !first {
557 557 charcopy(Some(&mut dest), &mut destlen, b'/')
558 558 };
559 559 memcopy(Some(&mut dest), &mut destlen, slice);
560 560 if dest[destlen - 1] == b'.' || dest[destlen - 1] == b' ' {
561 561 dest[destlen - 1] = b'_'
562 562 }
563 563 }
564 564 first = false;
565 565 }
566 566 if !first {
567 567 charcopy(Some(&mut dest), &mut destlen, b'/');
568 568 }
569 569 }
570 570
571 571 let used = destlen + 40 + {
572 572 if let Some(l) = last_dot {
573 573 src.len() - l
574 574 } else {
575 575 0
576 576 }
577 577 };
578 578
579 579 if MAXSTOREPATHLEN > used {
580 580 let slop = MAXSTOREPATHLEN - used;
581 581 let basenamelen = match last_slash {
582 582 Some(l) => src.len() - l - 1,
583 583 None => src.len(),
584 584 };
585 585 let basenamelen = std::cmp::min(basenamelen, slop);
586 586 if basenamelen > 0 {
587 587 let start = match last_slash {
588 588 Some(l) => l + 1,
589 589 None => 0,
590 590 };
591 591 memcopy(
592 592 Some(&mut dest),
593 593 &mut destlen,
594 594 &src[start..][..basenamelen],
595 595 )
596 596 }
597 597 }
598 598 for c in sha {
599 599 hexencode(Some(&mut dest), &mut destlen, *c);
600 600 }
601 601 if let Some(l) = last_dot {
602 602 memcopy(Some(&mut dest), &mut destlen, &src[l..]);
603 603 }
604 604 if destlen == dest.len() {
605 605 dest
606 606 } else {
607 607 // sometimes the path are shorter than MAXSTOREPATHLEN
608 608 dest[..destlen].to_vec()
609 609 }
610 610 }
611 611
612 612 const MAXENCODE: usize = 4096 * 4;
613 613 fn hash_encode(src: &[u8]) -> Vec<u8> {
614 614 let dired = &mut [0; MAXENCODE];
615 615 let lowered = &mut [0; MAXENCODE];
616 616 let auxed = &mut [0; MAXENCODE];
617 617 let baselen = (src.len() - 5) * 3;
618 618 if baselen >= MAXENCODE {
619 619 panic!("path_encode::hash_encore: string too long: {}", baselen)
620 620 };
621 621 let dirlen = encode_dir(Some(&mut dired[..]), src);
622 622 let sha = Sha1::digest(&dired[..dirlen]);
623 623 let lowerlen = lower_encode(Some(&mut lowered[..]), &dired[..dirlen][5..]);
624 624 let auxlen = aux_encode(Some(&mut auxed[..]), &lowered[..lowerlen]);
625 625 hash_mangle(&auxed[..auxlen], &sha)
626 626 }
627 627
628 628 pub fn path_encode(path: &[u8]) -> Vec<u8> {
629 629 let newlen = if path.len() <= MAXSTOREPATHLEN {
630 630 basic_encode(None, path)
631 631 } else {
632 632 MAXSTOREPATHLEN + 1
633 633 };
634 634 if newlen <= MAXSTOREPATHLEN {
635 635 if newlen == path.len() {
636 636 path.to_vec()
637 637 } else {
638 638 let mut res = vec![0; newlen];
639 639 basic_encode(Some(&mut res), path);
640 640 res
641 641 }
642 642 } else {
643 643 hash_encode(path)
644 644 }
645 645 }
646
647 #[cfg(test)]
648 mod tests {
649 use super::*;
650 use crate::utils::hg_path::HgPathBuf;
651
652 // expected failure
653 #[test]
654 #[should_panic]
655 fn test_long_filename_at_root() {
656 let input = b"data/ABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJABCDEFGHIJ.i";
657 let expected = b"dh/abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij.i708243a2237a7afae259ea3545a72a2ef11c247b.i";
658 let res = path_encode(input);
659 assert_eq!(
660 HgPathBuf::from_bytes(&res),
661 HgPathBuf::from_bytes(expected)
662 );
663 }
664 }
General Comments 0
You need to be logged in to leave comments. Login now