search_hub

at 9ceb48b Raw

use async_trait::async_trait;
use serde::Deserialize;

use crate::search_engines::{EngineError, ResultEntry, SearchEngine};

pub struct SearXng {
    pub instance: String,
    pub url_tpl: String,
    pub timeout_secs: Option<f32>,
}

#[derive(Deserialize)]
struct SearXngResult {
    title: Option<String>,
    url: Option<String>,
    content: Option<String>,
    engine: Option<String>,
}

#[derive(Deserialize)]
struct SearXngResponse {
    results: Vec<SearXngResult>,
}

#[async_trait]
impl SearchEngine for SearXng {
    fn id(&self) -> &str {
        "searxng"
    }

    fn name(&self) -> &str {
        "SearXNG"
    }

    fn url_template(&self) -> &str {
        &self.url_tpl
    }

    fn selector(&self) -> &str {
        ""
    }

    fn timeout(&self) -> std::time::Duration {
        self.timeout_secs
            .map(std::time::Duration::from_secs_f32)
            .unwrap_or_else(|| std::time::Duration::from_secs(5))
    }

    async fn fetch_results(
        &self,
        query: &str,
        client: &reqwest::Client,
    ) -> Result<Vec<ResultEntry>, EngineError> {
        let url = self.search_url(query);
        let body = client
            .get(&url)
            .header("Accept", "application/json")
            .send()
            .await
            .map_err(|e| EngineError(format!("searxng fetch failed: {e}")))?
            .text()
            .await
            .map_err(|e| EngineError(format!("searxng read body failed: {e}")))?;

        let resp: SearXngResponse = serde_json::from_str(&body)
            .map_err(|e| EngineError(format!("searxng JSON parse failed: {e}")))?;

        let results: Vec<ResultEntry> = resp
            .results
            .into_iter()
            .filter_map(|r| {
                let title = r.title.unwrap_or_default();
                let url = r.url?;
                if title.is_empty() {
                    return None;
                }
                Some(ResultEntry {
                    title,
                    url,
                    description: r.content,
                    engine: r.engine.unwrap_or_else(|| "searxng".into()),
                })
            })
            .collect();

        if results.is_empty() {
            Err(EngineError("no results found".into()))
        } else {
            Ok(results)
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::search_engines::SearchEngine;

    #[test]
    fn test_id() {
        let e = SearXng {
            instance: "https://example.com".into(),
            url_tpl: "https://example.com/search?format=json&q={}".into(),
            timeout_secs: None,
        };
        assert_eq!(e.id(), "searxng");
    }

    #[test]
    fn test_name() {
        let e = SearXng {
            instance: "https://example.com".into(),
            url_tpl: "https://example.com/search?format=json&q={}".into(),
            timeout_secs: None,
        };
        assert_eq!(e.name(), "SearXNG");
    }

    #[test]
    fn test_selector() {
        let e = SearXng {
            instance: "https://example.com".into(),
            url_tpl: "https://example.com/search?format=json&q={}".into(),
            timeout_secs: None,
        };
        assert_eq!(e.selector(), "");
    }

    #[test]
    fn test_url_template_returns_from_struct() {
        let e = SearXng {
            instance: "https://my-instance.net".into(),
            url_tpl: "https://my-instance.net/search?format=json&q={}".into(),
            timeout_secs: None,
        };
        assert_eq!(
            e.url_template(),
            "https://my-instance.net/search?format=json&q={}"
        );
    }

    #[test]
    fn test_search_url_uses_template() {
        let e = SearXng {
            instance: "https://example.com".into(),
            url_tpl: "https://example.com/search?format=json&q={}".into(),
            timeout_secs: None,
        };
        assert_eq!(
            e.search_url("tokio"),
            "https://example.com/search?format=json&q=tokio"
        );
    }
}