Commit
Message
Changed Files (6)
-
modified README.md
diff --git a/README.md b/README.md index dcef110..adb95d0 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,12 @@ instance = "https://search.kael.ink" # type = "wikipedia" # lang = "fr" # timeout_secs = 5.0 + +# MDN Web Docs search (optional, defaults to en-US) +# [[engines]] +# type = "mdn" +# locale = "fr" +# timeout_secs = 5.0 ``` ## Run the web server as a systemd user service -
modified src/config.rs
diff --git a/src/config.rs b/src/config.rs index 2f395cc..4dcac3a 100644 --- a/src/config.rs +++ b/src/config.rs @@ -33,6 +33,14 @@ pub enum EngineConfig { #[serde(default)] timeout_secs: Option<f32>, }, + /// MDN Web Docs search (language-specific) + Mdn { + /// Locale code (e.g. "en-US", "fr", "de"). Defaults to "en-US" if omitted. + #[serde(default)] + locale: Option<String>, + #[serde(default)] + timeout_secs: Option<f32>, + }, } #[async_trait] @@ -42,6 +50,7 @@ impl SearchEngine for EngineConfig { EngineConfig::CratesIo { .. } => "crates.io", EngineConfig::SearXng { .. } => "searxng", EngineConfig::Wikipedia { .. } => "wikipedia", + EngineConfig::Mdn { .. } => "mdn", } } @@ -50,6 +59,7 @@ impl SearchEngine for EngineConfig { EngineConfig::CratesIo { .. } => "crates.io", EngineConfig::SearXng { .. } => "SearXNG", EngineConfig::Wikipedia { .. } => "Wikipedia", + EngineConfig::Mdn { .. } => "MDN", } } @@ -65,7 +75,8 @@ impl SearchEngine for EngineConfig { let secs = match self { EngineConfig::CratesIo { timeout_secs, .. } | EngineConfig::SearXng { timeout_secs, .. } - | EngineConfig::Wikipedia { timeout_secs, .. } => timeout_secs, + | EngineConfig::Wikipedia { timeout_secs, .. } + | EngineConfig::Mdn { timeout_secs, .. } => timeout_secs, }; secs.map(|s| Duration::from_secs_f32(s)).unwrap_or(Duration::from_secs(5)) } @@ -98,6 +109,13 @@ impl SearchEngine for EngineConfig { }; engine.fetch_results(query, client).await } + EngineConfig::Mdn { locale, timeout_secs } => { + let engine = crate::search_engines::mdn::Mdn { + locale: locale.clone().unwrap_or_else(|| crate::search_engines::mdn::DEFAULT_LOCALE.into()), + timeout_secs: *timeout_secs, + }; + engine.fetch_results(query, client).await + } } } } -
added src/search_engines/mdn.rs
diff --git a/src/search_engines/mdn.rs b/src/search_engines/mdn.rs new file mode 100644 index 0000000..91c27ba --- /dev/null +++ b/src/search_engines/mdn.rs @@ -0,0 +1,134 @@ +use async_trait::async_trait; +use serde::Deserialize; + +use crate::search_engines::{EngineError, ResultEntry, SearchEngine}; +use crate::search_engines::utils::urlencode; + +pub const DEFAULT_LOCALE: &str = "en-US"; + +pub struct Mdn { + pub locale: String, + pub timeout_secs: Option<f32>, +} + +#[derive(Deserialize)] +struct MdnResult { + title: String, + summary: Option<String>, + mdn_url: String, +} + +#[derive(Deserialize)] +struct MdnResponse { + documents: Vec<MdnResult>, +} + +#[async_trait] +impl SearchEngine for Mdn { + fn id(&self) -> &str { + "mdn" + } + + fn name(&self) -> &str { + "MDN" + } + + fn url_template(&self) -> &str { + "https://developer.mozilla.org/{locale}/docs/..." + } + + fn selector(&self) -> &str { + "" + } + + fn timeout(&self) -> std::time::Duration { + self.timeout_secs + .map(|s| std::time::Duration::from_secs_f32(s)) + .unwrap_or_else(|| std::time::Duration::from_secs(5)) + } + + async fn fetch_results( + &self, + query: &str, + client: &reqwest::Client, + ) -> Result<Vec<ResultEntry>, EngineError> { + let url = format!( + "https://developer.mozilla.org/api/v1/search?q={}&locale={}", + urlencode(query), self.locale + ); + let body = client + .get(&url) + .send() + .await + .map_err(|e| EngineError(format!("fetch failed: {e}")))? + .text() + .await + .map_err(|e| EngineError(format!("read body failed: {e}")))?; + + let resp: MdnResponse = serde_json::from_str(&body) + .map_err(|e| EngineError(format!("JSON parse failed: {e}")))?; + + let results: Vec<ResultEntry> = resp + .documents + .into_iter() + .map(|d| { + let page_url = format!("https://developer.mozilla.org{}", d.mdn_url); + ResultEntry { + title: strip_mark_tags(&d.title), + url: page_url, + description: d.summary.map(|s| strip_mark_tags(&s)), + engine: format!("mdn.{}", self.locale), + } + }) + .collect(); + + if results.is_empty() { + Err(EngineError("no results found".into())) + } else { + Ok(results) + } + } +} + +fn strip_mark_tags(s: &str) -> String { + let s = s.replace("<mark>", ""); + s.replace("</mark>", "") +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::search_engines::SearchEngine; + + #[test] + fn test_id() { + let e = Mdn { locale: DEFAULT_LOCALE.into(), timeout_secs: None }; + assert_eq!(e.id(), "mdn"); + } + + #[test] + fn test_name() { + let e = Mdn { locale: DEFAULT_LOCALE.into(), timeout_secs: None }; + assert_eq!(e.name(), "MDN"); + } + + #[test] + fn test_selector() { + let e = Mdn { locale: DEFAULT_LOCALE.into(), timeout_secs: None }; + assert_eq!(e.selector(), ""); + } + + #[test] + fn test_engine_construct() { + let e = Mdn { locale: "fr".into(), timeout_secs: Some(3.0) }; + assert_eq!(e.locale, "fr"); + assert_eq!(e.timeout_secs, Some(3.0)); + } + + #[test] + fn test_strip_mark_tags() { + assert_eq!(strip_mark_tags("hello"), "hello"); + assert_eq!(strip_mark_tags("<mark>hello</mark>"), "hello"); + assert_eq!(strip_mark_tags("hello <mark>world</mark>"), "hello world"); + } +} -
modified src/search_engines/mod.rs
diff --git a/src/search_engines/mod.rs b/src/search_engines/mod.rs index 57a9c59..dad696f 100644 --- a/src/search_engines/mod.rs +++ b/src/search_engines/mod.rs @@ -1,4 +1,5 @@ pub mod crates_io; +pub mod mdn; pub mod searxng; pub mod utils; pub mod wikipedia; -
modified src/search_engines/utils.rs
diff --git a/src/search_engines/utils.rs b/src/search_engines/utils.rs index 5f59e45..e4d165d 100644 --- a/src/search_engines/utils.rs +++ b/src/search_engines/utils.rs @@ -14,6 +14,48 @@ pub fn urlencode(s: &str) -> String { out } +/// Decode common HTML entities (&, <, >, ", ', &#...;). +pub fn decode_html_entities(s: &str) -> String { + let mut out = String::with_capacity(s.len()); + let mut chars = s.chars().peekable(); + while let Some(c) = chars.next() { + if c == '&' { + let mut entity = String::new(); + for c in chars.by_ref() { + if c == ';' { + break; + } + entity.push(c); + } + let decoded = match entity.as_str() { + "amp" => "&", + "lt" => "<", + "gt" => ">", + "quot" => "\"", + "apos" => "'", + _ => { + if entity.starts_with('#') { + if let Ok(code) = entity[1..].parse::<u32>() { + if let Some(c) = char::from_u32(code) { + out.push(c); + continue; + } + } + } + out.push('&'); + out.push_str(&entity); + out.push(';'); + continue; + } + }; + out.push_str(decoded); + } else { + out.push(c); + } + } + out +} + #[cfg(test)] mod tests { use super::*; @@ -42,4 +84,24 @@ mod tests { fn test_urlencode_alphanum() { assert_eq!(urlencode("ABC123-_~."), "ABC123-_~."); } + + #[test] + fn test_decode_html_entities_amp() { + assert_eq!(decode_html_entities("A & B"), "A & B"); + } + + #[test] + fn test_decode_html_entities_quot() { + assert_eq!(decode_html_entities(""hello""), "\"hello\""); + } + + #[test] + fn test_decode_html_entities_numeric() { + assert_eq!(decode_html_entities("'hello'"), "'hello'"); + } + + #[test] + fn test_decode_html_entities_no_entities() { + assert_eq!(decode_html_entities("plain text"), "plain text"); + } } -
modified src/search_engines/wikipedia.rs
diff --git a/src/search_engines/wikipedia.rs b/src/search_engines/wikipedia.rs index 5116c19..f69c251 100644 --- a/src/search_engines/wikipedia.rs +++ b/src/search_engines/wikipedia.rs @@ -83,7 +83,7 @@ impl SearchEngine for Wikipedia { .map(|r| { let page_url = format!("https://{}.wikipedia.org/wiki/{}", self.lang, - urlencode(&r.title), + urlencode(&r.title).replace('+', "_"), ); ResultEntry { title: r.title, @@ -91,9 +91,9 @@ impl SearchEngine for Wikipedia { description: r.snippet.map(|s| { let s = s.replace("<span class=\"searchmatch\">", ""); let s = s.replace("</span>", ""); - s + decode_html_entities(&s) }), - engine: "wikipedia".into(), + engine: format!("wikipedia.{}", self.lang), } }) .collect(); @@ -106,8 +106,8 @@ impl SearchEngine for Wikipedia { } } -// share urlencode from the utils module -use crate::search_engines::utils::urlencode; +// share utility functions from the utils module +use crate::search_engines::utils::{decode_html_entities, urlencode}; #[cfg(test)] mod tests { @@ -139,6 +139,24 @@ mod tests { assert_eq!(e.timeout_secs, Some(3.0)); } + #[test] + fn test_title_url_spaces_become_underscores() { + let title = "IRC chat"; + let url = format!("https://en.wikipedia.org/wiki/{}", + urlencode(title).replace('+', "_"), + ); + assert_eq!(url, "https://en.wikipedia.org/wiki/IRC_chat"); + } + + #[test] + fn test_title_url_plus_sign_preserved() { + let title = "C++"; + let url = format!("https://en.wikipedia.org/wiki/{}", + urlencode(title).replace('+', "_"), + ); + assert_eq!(url, "https://en.wikipedia.org/wiki/C%2B%2B"); + } + #[test] fn test_api_url_format() { assert_eq!(