use std::sync::OnceLock; static RT: OnceLock<tokio::runtime::Runtime> = OnceLock::new(); fn rt() -> &'static tokio::runtime::Runtime { RT.get_or_init(|| tokio::runtime::Runtime::new().unwrap()) } fn fetch(url: &str) -> String { rt().block_on(async { let client = reqwest::Client::builder() .user_agent("search_hub_test") .build() .unwrap(); let resp = client.get(url).send().await.unwrap(); resp.text().await.unwrap() }) } fn print_md(name: &str, html: &str, md: &str) { println!(); println!("=== {name} ==="); println!("Raw HTML : {} bytes", html.len()); println!("Markdown : {} bytes", md.len()); println!("Ratio : {:.1}x smaller", html.len() as f64 / md.len().max(1) as f64); println!(); println!("--- Markdown output ---"); println!("{md}"); println!("--- end ---"); } #[test] fn strips_html_tags_and_preserves_text_example() { let html = fetch("https://example.com"); assert!(html.contains("<h1>"), "expected HTML to contain tags before conversion"); let md = htmd::convert(&html).expect("conversion should succeed"); print_md("example.com", &html, &md); assert!(!md.contains("<h1>"), "no HTML heading tags"); assert!(!md.contains("<a "), "no HTML anchor tags"); assert!(!md.contains("<div"), "no HTML div tags"); assert!(!md.contains("</"), "no closing HTML tags"); assert!(md.contains("Example Domain"), "visible heading text preserved"); assert_eq!(md.lines().filter(|l| l.starts_with('#')).count(), 1, "exactly one H1 in Markdown"); } #[test] fn strips_html_tags_and_preserves_text_rustlang() { let html = fetch("https://www.rust-lang.org"); assert!(html.contains("<html") || html.contains("<!DOCTYPE"), "expected valid HTML"); let md = htmd::convert(&html).expect("conversion should succeed"); print_md("rust-lang.org", &html, &md); assert!(!md.contains("<script"), "no script tags in output"); assert!(!md.contains("<style"), "no style tags in output"); assert!(!md.contains("class=\""), "no HTML attribute syntax in output"); assert!(!md.contains("id=\""), "no HTML id attributes in output"); assert!(md.contains("Rust"), "page title preserved in Markdown"); assert!(md.lines().any(|l| l.starts_with("# Rust")), "heading preserved as Markdown H1"); assert!(md.len() < html.len(), "Markdown smaller than raw HTML ({} vs {})", md.len(), html.len()); } #[test] fn markdown_output_is_readable() { let html = fetch("https://example.com"); let md = htmd::convert(&html).expect("conversion should succeed"); let lines: Vec<&str> = md.lines().filter(|l| !l.trim().is_empty()).collect(); assert!(lines.len() >= 3, "at least 3 non-empty lines of content"); let words: Vec<&str> = md.split_whitespace().collect(); assert!(words.len() >= 20, "at least 20 readable words in output"); }