search_hub

at 9ceb48b Raw

pub fn urlencode(s: &str) -> String {
    let mut out = String::with_capacity(s.len());
    for byte in s.bytes() {
        match byte {
            b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
                out.push(byte as char);
            }
            b' ' => out.push('+'),
            _ => {
                out.push_str(&format!("%{byte:02X}"));
            }
        }
    }
    out
}

/// Decode common HTML entities (&, <, >, ", ', &#...;).
pub fn decode_html_entities(s: &str) -> String {
    let mut out = String::with_capacity(s.len());
    let mut chars = s.chars().peekable();
    while let Some(c) = chars.next() {
        if c == '&' {
            let mut entity = String::new();
            for c in chars.by_ref() {
                if c == ';' {
                    break;
                }
                entity.push(c);
            }
            let decoded = match entity.as_str() {
                "amp" => "&",
                "lt" => "<",
                "gt" => ">",
                "quot" => "\"",
                "apos" => "'",
                _ => {
                    if let Some(stripped) = entity.strip_prefix('#') {
                        if let Ok(code) = stripped.parse::<u32>() {
                            if let Some(c) = char::from_u32(code) {
                                out.push(c);
                                continue;
                            }
                        }
                    }
                    out.push('&');
                    out.push_str(&entity);
                    out.push(';');
                    continue;
                }
            };
            out.push_str(decoded);
        } else {
            out.push(c);
        }
    }
    out
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_urlencode_plain() {
        assert_eq!(urlencode("hello"), "hello");
    }

    #[test]
    fn test_urlencode_spaces() {
        assert_eq!(urlencode("hello world"), "hello+world");
    }

    #[test]
    fn test_urlencode_special() {
        assert_eq!(urlencode("a&b/c"), "a%26b%2Fc");
    }

    #[test]
    fn test_urlencode_empty() {
        assert_eq!(urlencode(""), "");
    }

    #[test]
    fn test_urlencode_alphanum() {
        assert_eq!(urlencode("ABC123-_~."), "ABC123-_~.");
    }

    #[test]
    fn test_decode_html_entities_amp() {
        assert_eq!(decode_html_entities("A &amp; B"), "A & B");
    }

    #[test]
    fn test_decode_html_entities_quot() {
        assert_eq!(decode_html_entities("&quot;hello&quot;"), "\"hello\"");
    }

    #[test]
    fn test_decode_html_entities_numeric() {
        assert_eq!(decode_html_entities("&#39;hello&#39;"), "'hello'");
    }

    #[test]
    fn test_decode_html_entities_no_entities() {
        assert_eq!(decode_html_entities("plain text"), "plain text");
    }
}