use async_trait::async_trait; use serde::Deserialize; use crate::search_engines::{EngineError, ResultEntry, SearchEngine}; pub const DEFAULT_LANG: &str = "en"; pub struct Wikipedia { pub lang: String, pub timeout_secs: Option<f32>, } fn api_url(lang: &str) -> String { format!("https://{lang}.wikipedia.org/w/api.php?action=query&list=search&srsearch={{}}&format=json&srlimit=10") } #[derive(Deserialize)] struct SearchResult { title: String, snippet: Option<String>, } #[derive(Deserialize)] struct Query { search: Vec<SearchResult>, } #[derive(Deserialize)] struct ApiResponse { query: Option<Query>, } #[async_trait] impl SearchEngine for Wikipedia { fn id(&self) -> &str { "wikipedia" } fn name(&self) -> &str { "Wikipedia" } fn url_template(&self) -> &str { // dynamic, so we construct at build time // this is only a best-effort display "https://{lang}.wikipedia.org/w/index.php?search={}" } fn selector(&self) -> &str { "" } fn timeout(&self) -> std::time::Duration { self.timeout_secs .map(std::time::Duration::from_secs_f32) .unwrap_or_else(|| std::time::Duration::from_secs(5)) } async fn fetch_results( &self, query: &str, client: &reqwest::Client, ) -> Result<Vec<ResultEntry>, EngineError> { let url = api_url(&self.lang).replace("{}", &urlencode(query)); let body = client .get(&url) .header("Accept", "application/json") .send() .await .map_err(|e| EngineError(format!("fetch failed: {e}")))? .text() .await .map_err(|e| EngineError(format!("read body failed: {e}")))?; let resp: ApiResponse = serde_json::from_str(&body) .map_err(|e| EngineError(format!("JSON parse failed: {e}")))?; let results: Vec<ResultEntry> = resp .query .map(|q| q.search) .unwrap_or_default() .into_iter() .map(|r| { let page_url = format!("https://{}.wikipedia.org/wiki/{}", self.lang, urlencode(&r.title).replace('+', "_"), ); ResultEntry { title: r.title, url: page_url, description: r.snippet.map(|s| { let s = s.replace("<span class=\"searchmatch\">", ""); let s = s.replace("</span>", ""); decode_html_entities(&s) }), engine: format!("wikipedia.{}", self.lang), } }) .collect(); if results.is_empty() { Err(EngineError("no results found".into())) } else { Ok(results) } } } // share utility functions from the utils module use crate::search_engines::utils::{decode_html_entities, urlencode}; #[cfg(test)] mod tests { use super::*; use crate::search_engines::SearchEngine; #[test] fn test_id() { let e = Wikipedia { lang: DEFAULT_LANG.into(), timeout_secs: None }; assert_eq!(e.id(), "wikipedia"); } #[test] fn test_name() { let e = Wikipedia { lang: DEFAULT_LANG.into(), timeout_secs: None }; assert_eq!(e.name(), "Wikipedia"); } #[test] fn test_selector() { let e = Wikipedia { lang: DEFAULT_LANG.into(), timeout_secs: None }; assert_eq!(e.selector(), ""); } #[test] fn test_engine_construct() { let e = Wikipedia { lang: "fr".into(), timeout_secs: Some(3.0) }; assert_eq!(e.lang, "fr"); assert_eq!(e.timeout_secs, Some(3.0)); } #[test] fn test_title_url_spaces_become_underscores() { let title = "IRC chat"; let url = format!("https://en.wikipedia.org/wiki/{}", urlencode(title).replace('+', "_"), ); assert_eq!(url, "https://en.wikipedia.org/wiki/IRC_chat"); } #[test] fn test_title_url_plus_sign_preserved() { let title = "C++"; let url = format!("https://en.wikipedia.org/wiki/{}", urlencode(title).replace('+', "_"), ); assert_eq!(url, "https://en.wikipedia.org/wiki/C%2B%2B"); } #[test] fn test_api_url_format() { assert_eq!( api_url("en"), "https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={}&format=json&srlimit=10" ); assert_eq!( api_url("fr"), "https://fr.wikipedia.org/w/api.php?action=query&list=search&srsearch={}&format=json&srlimit=10" ); } }