1use std::{borrow::Cow, fmt::Display, string::FromUtf8Error};
30
31const UNRESERVED: [bool; 256] = {
32 let mut table = [false; 256];
33 let mut i = b'0';
34 while i <= b'9' {
35 table[i as usize] = true;
36 i += 1;
37 }
38 i = b'A';
39 while i <= b'Z' {
40 table[i as usize] = true;
41 i += 1;
42 }
43 i = b'a';
44 while i <= b'z' {
45 table[i as usize] = true;
46 i += 1;
47 }
48 table[b'-' as usize] = true;
49 table[b'.' as usize] = true;
50 table[b'_' as usize] = true;
51 table[b'~' as usize] = true;
52 table
53};
54
55const ENCODE_PAIR: [[u8; 2]; 256] = {
56 const NIBBLE: [u8; 16] = *b"0123456789ABCDEF";
57 let mut table = [[0u8; 2]; 256];
58 let mut i = 0u16;
59 while i < 256 {
60 table[i as usize] = [NIBBLE[(i >> 4) as usize], NIBBLE[(i & 0x0f) as usize]];
61 i += 1;
62 }
63 table
64};
65
66const DECODE_NIBBLE: [u8; 256] = {
68 let mut table = [0xFFu8; 256];
69 let mut i = 0u8;
70 while i < 10 {
71 table[(b'0' + i) as usize] = i;
72 i += 1;
73 }
74 i = 0;
75 while i < 6 {
76 table[(b'a' + i) as usize] = 10 + i;
77 table[(b'A' + i) as usize] = 10 + i;
78 i += 1;
79 }
80 table
81};
82
83#[must_use]
94pub fn encode(input: &str) -> Cow<'_, str> {
95 match encode_bytes(input.as_bytes()) {
96 Cow::Borrowed(_) => Cow::Borrowed(input),
97 Cow::Owned(bytes) => Cow::Owned(String::from_utf8(bytes).expect("encoded output is ASCII")),
98 }
99}
100
101#[must_use]
106pub fn encode_bytes(input: &[u8]) -> Cow<'_, [u8]> {
107 let Some(first) = input.iter().position(|&b| !UNRESERVED[b as usize]) else {
108 return Cow::Borrowed(input);
109 };
110
111 let mut out = Vec::with_capacity(input.len() + input.len() / 2 + 16);
114 out.extend_from_slice(&input[..first]);
115
116 let mut rest = &input[first..];
117 while let Some(&byte) = rest.first() {
118 if UNRESERVED[byte as usize] {
119 let run_end = rest
120 .iter()
121 .position(|&b| !UNRESERVED[b as usize])
122 .unwrap_or(rest.len());
123 out.extend_from_slice(&rest[..run_end]);
124 rest = &rest[run_end..];
125 } else {
126 out.push(b'%');
127 out.extend_from_slice(&ENCODE_PAIR[byte as usize]);
128 rest = &rest[1..];
129 }
130 }
131 Cow::Owned(out)
132}
133
134pub fn decode(input: &str) -> Result<Cow<'_, str>, DecodeError> {
145 match decode_bytes(input.as_bytes()) {
146 Cow::Borrowed(_) => Ok(Cow::Borrowed(input)),
147 Cow::Owned(bytes) => String::from_utf8(bytes)
148 .map(Cow::Owned)
149 .map_err(DecodeError::InvalidUtf8),
150 }
151}
152
153#[must_use]
158pub fn decode_bytes(input: &[u8]) -> Cow<'_, [u8]> {
159 let Some(first) = input.iter().position(|&b| b == b'%') else {
160 return Cow::Borrowed(input);
161 };
162
163 let mut out = Vec::with_capacity(input.len());
164 out.extend_from_slice(&input[..first]);
165
166 let mut i = first;
167 while i < input.len() {
168 if input[i] == b'%' {
169 if i + 2 < input.len() {
170 let hi = DECODE_NIBBLE[input[i + 1] as usize];
171 let lo = DECODE_NIBBLE[input[i + 2] as usize];
172 if (hi | lo) & 0xF0 == 0 {
173 out.push((hi << 4) | lo);
174 i += 3;
175 continue;
176 }
177 }
178 out.push(b'%');
180 i += 1;
181 } else {
182 let run_start = i;
183 while i < input.len() && input[i] != b'%' {
184 i += 1;
185 }
186 out.extend_from_slice(&input[run_start..i]);
187 }
188 }
189 Cow::Owned(out)
190}
191
192#[derive(Debug)]
194pub enum DecodeError {
195 InvalidUtf8(FromUtf8Error),
197}
198
199impl Display for DecodeError {
200 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
201 match self {
202 Self::InvalidUtf8(err) => write!(f, "invalid UTF-8 in decoded bytes: {err}"),
203 }
204 }
205}
206
207impl std::error::Error for DecodeError {
208 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
209 match self {
210 Self::InvalidUtf8(err) => Some(err),
211 }
212 }
213}
214
215impl From<FromUtf8Error> for DecodeError {
216 fn from(err: FromUtf8Error) -> Self {
217 Self::InvalidUtf8(err)
218 }
219}
220
221#[cfg(test)]
222mod tests {
223 use rstest::rstest;
224
225 use super::*;
226
227 const UNRESERVED_CHARS: &str =
229 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~";
230
231 const RESERVED_CHARS: &str = ":/?#[]@!$&'()*+,;=";
234
235 #[rstest]
236 #[case("", "")]
237 #[case("abc", "abc")]
238 #[case("ABC-xyz_0.9~", "ABC-xyz_0.9~")]
239 #[case(" ", "%20")]
240 #[case("+", "%2B")]
241 #[case("/", "%2F")]
242 #[case("?", "%3F")]
243 #[case("#", "%23")]
244 #[case("&", "%26")]
245 #[case("=", "%3D")]
246 #[case("%", "%25")]
247 #[case("hello world", "hello%20world")]
248 #[case("a b+c/d", "a%20b%2Bc%2Fd")]
249 #[case("\x7f", "%7F")]
251 fn test_encode_ascii_vectors(#[case] input: &str, #[case] expected: &str) {
252 assert_eq!(encode(input), expected);
253 }
254
255 #[rstest]
256 fn test_encode_all_unreserved_unchanged() {
257 let out = encode(UNRESERVED_CHARS);
259 assert_eq!(out, UNRESERVED_CHARS);
260 assert!(matches!(out, Cow::Borrowed(_)));
262 }
263
264 #[rstest]
265 fn test_encode_all_reserved_percent_encoded() {
266 let out = encode(RESERVED_CHARS);
267 assert_eq!(out.len(), RESERVED_CHARS.len() * 3);
269 for byte in out.bytes() {
272 assert!(
273 matches!(byte, b'%' | b'0'..=b'9' | b'A'..=b'F'),
274 "unexpected byte {byte:#04x} in encoded reserved output"
275 );
276 }
277 }
278
279 #[rstest]
280 fn test_encode_hex_is_uppercase() {
281 let out = encode("/");
283 assert_eq!(out, "%2F");
284 assert!(!out.contains('f'));
285 }
286
287 #[rstest]
288 fn test_encode_every_byte_position() {
289 for byte in 0u8..=255 {
292 let input = [byte];
293 let out = encode_bytes(&input);
294
295 if UNRESERVED[byte as usize] {
296 assert!(
297 matches!(out, Cow::Borrowed(_)),
298 "unreserved byte {byte:#04x} should not allocate"
299 );
300 assert_eq!(out.as_ref(), &[byte]);
301 } else {
302 let expected = format!("%{byte:02X}").into_bytes();
303 assert_eq!(out.as_ref(), expected.as_slice(), "byte {byte:#04x}");
304 }
305 }
306 }
307
308 #[rstest]
309 fn test_encode_utf8_multibyte() {
310 assert_eq!(encode("\u{00E9}"), "%C3%A9");
312 assert_eq!(encode("\u{4E2D}"), "%E4%B8%AD");
314 assert_eq!(encode("\u{1F600}"), "%F0%9F%98%80");
316 }
317
318 #[rstest]
319 fn test_encode_mixed_ascii_and_utf8() {
320 assert_eq!(encode("a é/"), "a%20%C3%A9%2F");
321 }
322
323 #[rstest]
324 fn test_encode_returns_borrowed_when_no_work() {
325 let out = encode("safe-string_123.xyz~");
326 assert!(matches!(out, Cow::Borrowed(_)));
327 }
328
329 #[rstest]
330 fn test_encode_returns_owned_when_encoding_needed() {
331 let out = encode("needs encoding");
332 assert!(matches!(out, Cow::Owned(_)));
333 }
334
335 #[rstest]
336 #[case("", "")]
337 #[case("abc", "abc")]
338 #[case("%20", " ")]
339 #[case("%2F", "/")]
340 #[case("%2f", "/")] #[case("%2b", "+")]
342 #[case("%25", "%")]
343 #[case("hello%20world", "hello world")]
344 #[case("a%20b%2Bc%2Fd", "a b+c/d")]
345 #[case("%C3%A9", "\u{00E9}")]
346 #[case("%E4%B8%AD", "\u{4E2D}")]
347 #[case("%F0%9F%98%80", "\u{1F600}")]
348 fn test_decode_ascii_and_utf8_vectors(#[case] input: &str, #[case] expected: &str) {
349 assert_eq!(decode(input).unwrap(), expected);
350 }
351
352 #[rstest]
353 #[case("%", "%")] #[case("%2", "%2")] #[case("%GG", "%GG")] #[case("%2G", "%2G")] #[case("%G2", "%G2")] #[case("%%20", "% ")] #[case("100%", "100%")] fn test_decode_malformed_percent_passes_through(#[case] input: &str, #[case] expected: &str) {
361 assert_eq!(decode(input).unwrap(), expected);
362 }
363
364 #[rstest]
365 fn test_decode_returns_borrowed_when_no_percent() {
366 let out = decode("no-percent-here").unwrap();
367 assert!(matches!(out, Cow::Borrowed(_)));
368 }
369
370 #[rstest]
371 fn test_decode_returns_owned_when_percent_present() {
372 let out = decode("a%20b").unwrap();
373 assert!(matches!(out, Cow::Owned(_)));
374 }
375
376 #[rstest]
377 #[case("this%2x%26that", "this%2x&that")]
378 #[case("%%25", "%%")]
379 #[case("%2%26", "%2&")]
380 #[case("a%2Zb%20c", "a%2Zb c")]
381 fn test_decode_malformed_then_valid(#[case] input: &str, #[case] expected: &str) {
382 assert_eq!(decode(input).unwrap(), expected);
383 }
384
385 #[rstest]
386 fn test_decode_invalid_utf8_errors() {
387 let err = decode("%FF").unwrap_err();
389 assert!(matches!(err, DecodeError::InvalidUtf8(_)));
390 }
391
392 #[rstest]
393 fn test_decode_invalid_utf8_bytes_ok() {
394 let out = decode_bytes(b"%FF");
396 assert_eq!(out.as_ref(), &[0xFF]);
397 }
398
399 #[rstest]
400 fn test_decode_consecutive_percent_triples() {
401 assert_eq!(decode("%e2%98%83").unwrap(), "\u{2603}"); }
404
405 #[rstest]
406 fn test_decode_nul_byte() {
407 let decoded = decode("a%00b").unwrap();
409 assert_eq!(decoded.as_bytes(), &[b'a', 0x00, b'b']);
410 }
411
412 #[rstest]
413 fn test_roundtrip_every_byte() {
414 for byte in 0u8..=255 {
417 let input = [byte];
418 let encoded = encode_bytes(&input);
419 let decoded = decode_bytes(encoded.as_ref());
420 assert_eq!(
421 decoded.as_ref(),
422 input.as_slice(),
423 "round-trip failed for byte {byte:#04x}"
424 );
425 }
426 }
427
428 #[rstest]
429 #[case("hello")]
430 #[case("a b c")]
431 #[case("https://example.com/path?q=1&x=2")]
432 #[case("\u{00E9}\u{00E0}\u{00FC}")]
433 #[case("\u{4E2D}\u{6587}\u{6D4B}\u{8BD5}")]
434 #[case("mix 123 !@# %^&*()")]
435 #[case("\u{1F600}\u{1F680}\u{1F3C6}")]
436 fn test_roundtrip_string(#[case] input: &str) {
437 let encoded = encode(input);
438 let decoded = decode(&encoded).unwrap();
439 assert_eq!(decoded, input);
440 }
441
442 #[rstest]
443 fn test_encoded_output_only_ascii() {
444 let encoded = encode("\u{00E9}\u{4E2D}\u{1F600}");
446 assert!(encoded.is_ascii(), "encoded output must be ASCII-only");
447 }
448
449 #[rstest]
450 fn test_encode_bytes_arbitrary_binary() {
451 let input: Vec<u8> = (0u8..=255).collect();
454 let encoded = encode_bytes(&input);
455 assert!(encoded.iter().all(u8::is_ascii));
456 let decoded = decode_bytes(encoded.as_ref());
457 assert_eq!(decoded.as_ref(), input.as_slice());
458 }
459
460 #[rstest]
461 fn test_decode_error_display_and_source() {
462 let err = decode("%FF").unwrap_err();
463 let msg = err.to_string();
464 assert!(msg.starts_with("invalid UTF-8"), "got: {msg}");
465 assert!(std::error::Error::source(&err).is_some());
466 }
467
468 fn reference_encode(input: &[u8]) -> Vec<u8> {
473 let mut out = Vec::with_capacity(input.len());
474 for &b in input {
475 let is_unreserved =
476 b.is_ascii_alphanumeric() || b == b'-' || b == b'.' || b == b'_' || b == b'~';
477 if is_unreserved {
478 out.push(b);
479 } else {
480 out.push(b'%');
481 out.extend_from_slice(format!("{b:02X}").as_bytes());
482 }
483 }
484 out
485 }
486
487 fn reference_decode(input: &[u8]) -> Vec<u8> {
488 let mut out = Vec::with_capacity(input.len());
489 let mut i = 0;
490 while i < input.len() {
491 if input[i] == b'%' && i + 2 < input.len() {
492 let a = input[i + 1];
493 let b = input[i + 2];
494 if a.is_ascii_hexdigit() && b.is_ascii_hexdigit() {
495 let hi = if a.is_ascii_digit() {
496 a - b'0'
497 } else {
498 (a | 0x20) - b'a' + 10
499 };
500 let lo = if b.is_ascii_digit() {
501 b - b'0'
502 } else {
503 (b | 0x20) - b'a' + 10
504 };
505 out.push((hi << 4) | lo);
506 i += 3;
507 continue;
508 }
509 }
510 out.push(input[i]);
511 i += 1;
512 }
513 out
514 }
515
516 proptest::proptest! {
517 #[rstest]
518 fn prop_encode_matches_reference(input: Vec<u8>) {
519 let actual = encode_bytes(&input);
520 let expected = reference_encode(&input);
521 proptest::prop_assert_eq!(actual.as_ref(), expected.as_slice());
522 }
523
524 #[rstest]
525 fn prop_decode_matches_reference(input: Vec<u8>) {
526 let actual = decode_bytes(&input);
527 let expected = reference_decode(&input);
528 proptest::prop_assert_eq!(actual.as_ref(), expected.as_slice());
529 }
530
531 #[rstest]
532 fn prop_bytes_roundtrip(input: Vec<u8>) {
533 let encoded = encode_bytes(&input);
534 let decoded = decode_bytes(encoded.as_ref());
535 proptest::prop_assert_eq!(decoded.as_ref(), input.as_slice());
536 }
537
538 #[rstest]
539 fn prop_string_roundtrip(input: String) {
540 let encoded = encode(&input);
541 let decoded = decode(&encoded).unwrap();
542 proptest::prop_assert_eq!(decoded.as_ref(), input.as_str());
543 }
544
545 #[rstest]
546 fn prop_encoded_output_ascii(input: Vec<u8>) {
547 let encoded = encode_bytes(&input);
548 proptest::prop_assert!(encoded.iter().all(u8::is_ascii));
549 }
550 }
551}