use figment::Figment; use figment::providers::{Format, Toml}; use serde::Deserialize; use std::path::PathBuf; use std::time::Duration; use async_trait::async_trait; use crate::search_engines::{EngineError, ResultEntry, SearchEngine}; /// Configuration for a single search engine instance. #[derive(Debug, Deserialize, Clone)] #[serde(tag = "type", rename_all = "lowercase")] pub enum EngineConfig { /// crates.io registry (public or private) #[serde(rename = "crates_io")] CratesIo { #[serde(default)] url: Option<String>, #[serde(default)] timeout_secs: Option<f32>, }, /// SearXNG meta-search engine SearXng { instance: String, #[serde(default)] timeout_secs: Option<f32>, }, /// Wikipedia search (language-specific) Wikipedia { /// Language code (e.g. "en", "fr", "de"). Defaults to "en" if omitted. #[serde(default)] lang: Option<String>, #[serde(default)] timeout_secs: Option<f32>, }, /// MDN Web Docs search (language-specific) Mdn { /// Locale code (e.g. "en-US", "fr", "de"). Defaults to "en-US" if omitted. #[serde(default)] locale: Option<String>, #[serde(default)] timeout_secs: Option<f32>, }, /// Generic HTML-scraped search engine (configurable URL + CSS selector) Generic { /// Display name for this engine (e.g. "DuckDuckGo", "Stack Overflow"). name: String, /// URL template with `{}` placeholder for the query. url: String, /// CSS selector targeting the result container. selector: String, #[serde(default)] timeout_secs: Option<f32>, }, } #[async_trait] impl SearchEngine for EngineConfig { fn id(&self) -> &str { match self { EngineConfig::CratesIo { .. } => "crates.io", EngineConfig::SearXng { .. } => "searxng", EngineConfig::Wikipedia { .. } => "wikipedia", EngineConfig::Mdn { .. } => "mdn", EngineConfig::Generic { .. } => "generic", } } fn name(&self) -> &str { match self { EngineConfig::CratesIo { .. } => "crates.io", EngineConfig::SearXng { .. } => "SearXNG", EngineConfig::Wikipedia { .. } => "Wikipedia", EngineConfig::Mdn { .. } => "MDN", EngineConfig::Generic { name, .. } => name, } } fn url_template(&self) -> &str { match self { EngineConfig::Generic { url, .. } => url, _ => "", } } fn selector(&self) -> &str { match self { EngineConfig::Generic { selector, .. } => selector, _ => "", } } fn timeout(&self) -> Duration { let secs = match self { EngineConfig::CratesIo { timeout_secs, .. } | EngineConfig::SearXng { timeout_secs, .. } | EngineConfig::Wikipedia { timeout_secs, .. } | EngineConfig::Mdn { timeout_secs, .. } | EngineConfig::Generic { timeout_secs, .. } => timeout_secs, }; secs.map(|s| Duration::from_secs_f32(s)).unwrap_or(Duration::from_secs(5)) } async fn fetch_results( &self, query: &str, client: &reqwest::Client, ) -> Result<Vec<ResultEntry>, EngineError> { match self { EngineConfig::CratesIo { url, timeout_secs } => { let engine = crate::search_engines::crates_io::CratesIo { timeout_secs: *timeout_secs, api_url: url.clone().unwrap_or_else(|| crate::search_engines::crates_io::DEFAULT_API_URL.into()), }; engine.fetch_results(query, client).await } EngineConfig::SearXng { instance, timeout_secs } => { let engine = crate::search_engines::searxng::SearXng { instance: instance.clone(), url_tpl: format!("{}/search?format=json&q={{}}", instance.trim_end_matches('/')), timeout_secs: *timeout_secs, }; engine.fetch_results(query, client).await } EngineConfig::Wikipedia { lang, timeout_secs } => { let engine = crate::search_engines::wikipedia::Wikipedia { lang: lang.clone().unwrap_or_else(|| crate::search_engines::wikipedia::DEFAULT_LANG.into()), timeout_secs: *timeout_secs, }; engine.fetch_results(query, client).await } EngineConfig::Mdn { locale, timeout_secs } => { let engine = crate::search_engines::mdn::Mdn { locale: locale.clone().unwrap_or_else(|| crate::search_engines::mdn::DEFAULT_LOCALE.into()), timeout_secs: *timeout_secs, }; engine.fetch_results(query, client).await } EngineConfig::Generic { name, url, selector, timeout_secs } => { let engine = crate::search_engines::generic::Generic { name: name.clone(), url: url.clone(), selector: selector.clone(), timeout_secs: *timeout_secs, }; engine.fetch_results(query, client).await } } } } /// Application configuration loaded from the TOML config file. /// /// # Example /// /// ```ignore /// let cfg = search_hub::config::Config::load(); /// let engines = cfg.engines.clone(); /// println!("{} engines enabled", engines.len()); /// ``` #[derive(Debug, Deserialize)] pub struct Config { /// Custom tag definitions. If non-empty, these replace the hardcoded defaults. #[serde(default)] pub tags: Vec<crate::tagging::TagDef>, /// Whether auto-tagging is enabled. Defaults to `false` if not set. #[serde(default)] pub tagging_enabled: Option<bool>, /// Tagging threshold (0.0 to 1.0). Defaults to 0.60 if not set. #[serde(default)] pub tagging_threshold: Option<f64>, /// Hostnames to exclude from content fetching. #[serde(default)] pub exclude_urls: Option<Vec<String>>, /// Per-engine configuration. Each entry defines an enabled search engine. #[serde(default)] pub engines: Vec<EngineConfig>, /// Default bookmark database path. #[serde(default)] pub db_path: Option<String>, /// Server bind address (default: "127.0.0.1"). #[serde(default)] pub bind_address: Option<String>, /// Results per page (default: 20). #[serde(default)] pub page_size: Option<usize>, /// Actix worker threads (default: 2). #[serde(default)] pub workers: Option<usize>, /// ONNX embedding model name (default: "BGESmallENV15"). #[serde(default)] pub onnx_model: Option<String>, /// Max characters to use from page content for tagging (default: 2000). #[serde(default)] pub truncation: Option<usize>, /// Max tags to assign per bookmark (default: 5). #[serde(default)] pub max_tags: Option<usize>, } impl Config { /// Load configuration from the default config file path. /// /// Returns a default (empty) `Config` if the file doesn't exist or can't be parsed. /// Parse errors are printed to stderr. /// /// # Example /// /// ```ignore /// let cfg = search_hub::config::Config::load(); /// ``` pub fn load() -> Self { Self::load_from(&config_file_path()) } /// Load configuration from a specific file path. /// /// Returns a default (empty) `Config` if the file doesn't exist or can't be parsed. /// Parse errors are printed to stderr. /// /// # Example /// /// ```ignore /// let cfg = search_hub::config::Config::load_from(&PathBuf::from("/tmp/test.toml")); /// ``` pub fn load_from(path: &PathBuf) -> Self { if path.exists() { Figment::new() .merge(Toml::file(path)) .extract() .unwrap_or_else(|e| { eprintln!("Warning: failed to parse config file {:?}: {}", path, e); Config::default() }) } else { Config::default() } } } impl Default for Config { fn default() -> Self { Config { tags: Vec::new(), tagging_enabled: None, tagging_threshold: None, exclude_urls: None, engines: Vec::new(), db_path: None, bind_address: None, page_size: None, workers: None, onnx_model: None, truncation: None, max_tags: None, } } } /// Return the expected config file path (e.g. `~/.config/search_hub/config.toml` on Linux). /// /// # Example /// /// ```ignore /// let path = search_hub::config::config_file_path(); /// ``` /// /// # Panics /// /// Panics if the platform has no valid config directory. pub fn config_file_path() -> PathBuf { let dirs = directories::ProjectDirs::from("com", "search_hub", "search_hub") .expect("no valid config directory"); let config_dir = dirs.config_dir(); config_dir.join("config.toml") } #[cfg(test)] mod tests { use super::*; use tempfile::NamedTempFile; use std::io::Write; #[test] fn load_from_missing_file_returns_default() { let cfg = Config::load_from(&PathBuf::from("/nonexistent/path.toml")); assert!(cfg.tags.is_empty()); assert!(cfg.engines.is_empty()); } #[test] fn load_from_valid_file_with_engines() { let mut file = NamedTempFile::new().unwrap(); write!(file, r#" [[engines]] type = "searxng" instance = "https://search.example.com" "#).unwrap(); let cfg = Config::load_from(&file.path().to_path_buf()); assert!(!cfg.engines.is_empty()); assert!(matches!(cfg.engines[0], EngineConfig::SearXng { .. })); if let EngineConfig::SearXng { instance, .. } = &cfg.engines[0] { assert_eq!(instance, "https://search.example.com"); } else { panic!("expected SearXng"); } } #[test] fn resolve_engines_includes_searxng_from_engines_vec() { let mut file = NamedTempFile::new().unwrap(); write!(file, r#" [[engines]] type = "searxng" instance = "https://search.example.com" "#).unwrap(); let cfg = Config::load_from(&file.path().to_path_buf()); let engines = cfg.engines.clone(); assert!(engines.iter().any(|e| e.id() == "searxng")); } #[test] fn resolve_engines_empty_by_default() { let cfg = Config::default(); assert!(cfg.engines.is_empty()); } #[test] fn resolve_engines_includes_crates_io_when_configured() { let mut file = NamedTempFile::new().unwrap(); write!(file, r#" [[engines]] type = "crates_io" "#).unwrap(); let cfg = Config::load_from(&file.path().to_path_buf()); let engines = cfg.engines.clone(); assert!(engines.iter().any(|e| e.id() == "crates.io")); } #[test] fn resolve_engines_includes_wikipedia_when_configured() { let mut file = NamedTempFile::new().unwrap(); write!(file, r#" [[engines]] type = "wikipedia" lang = "fr" "#).unwrap(); let cfg = Config::load_from(&file.path().to_path_buf()); let engines = cfg.engines.clone(); assert!(engines.iter().any(|e| e.id() == "wikipedia")); } #[test] fn resolve_engines_respects_multiple_engines() { let mut file = NamedTempFile::new().unwrap(); write!(file, r#" [[engines]] type = "crates_io" [[engines]] type = "searxng" instance = "https://search.example.com" "#).unwrap(); let cfg = Config::load_from(&file.path().to_path_buf()); let engines = cfg.engines.clone(); assert!(engines.iter().any(|e| e.id() == "crates.io")); assert!(engines.iter().any(|e| e.id() == "searxng")); } #[test] fn parse_error_returns_default() { let mut file = NamedTempFile::new().unwrap(); write!(file, "invalid toml [[[").unwrap(); let cfg = Config::load_from(&file.path().to_path_buf()); assert!(cfg.tags.is_empty()); assert!(cfg.engines.is_empty()); } #[test] fn tagging_enabled_defaults_to_false() { let cfg = Config::default(); assert_eq!(cfg.tagging_enabled.unwrap_or(false), false); } #[test] fn tagging_enabled_can_be_false() { let mut file = NamedTempFile::new().unwrap(); write!(file, r#"tagging_enabled = false"#).unwrap(); let cfg = Config::load_from(&file.path().to_path_buf()); assert_eq!(cfg.tagging_enabled, Some(false)); } #[test] fn tagging_enabled_can_be_true() { let mut file = NamedTempFile::new().unwrap(); write!(file, r#"tagging_enabled = true"#).unwrap(); let cfg = Config::load_from(&file.path().to_path_buf()); assert_eq!(cfg.tagging_enabled, Some(true)); } }