search_hub

at 4551d56 Raw

use figment::Figment;
use figment::providers::{Format, Toml};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::PathBuf;

/// A single search engine definition with an optional CSS selector for
/// inline result extraction.
///
/// When `selector` is `Some`, the search handler uses `scraper` to find
/// that container in the engine's HTML and extract `<a>` links from it.
/// Engines without a selector are skipped for inline extraction.
///
/// # Example
///
/// ```rust
/// use search_hub::config::ForwarderDef;
///
/// let ddg = ForwarderDef {
///     id: "duckduckgo".into(),
///     name: "DuckDuckGo".into(),
///     url: "https://duckduckgo.com/?q={}".into(),
///     selector: Some("article[data-testid='result']".into()),
/// };
/// assert_eq!(ddg.id, "duckduckgo");
/// ```
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ForwarderDef {
    /// URL query parameter identifier (e.g. "duckduckgo").
    pub id: String,
    /// Display name (e.g. "DuckDuckGo").
    pub name: String,
    /// URL template with `{}` placeholder for the query string.
    pub url: String,
    /// CSS selector for the result container in the engine's HTML page.
    /// Used for inline result extraction; `None` skips inline extraction.
    #[serde(default)]
    pub selector: Option<String>,
}

/// Application configuration loaded from the TOML config file.
///
/// Supports `[[tags]]`, `enabled_engines`, `tagging_enabled`, `tagging_threshold`, and `[engines.*]`.
///
/// # Example
///
/// ```ignore
/// let cfg = search_hub::config::Config::load();
/// if cfg.tags.is_empty() {
///     println!("using default tags");
/// }
/// let engines = cfg.resolve_engines();
/// println!("{} engines enabled", engines.len());
/// ```
#[derive(Debug, Deserialize)]
pub struct Config {
    /// Custom tag definitions. If non-empty, these replace the hardcoded defaults.
    #[serde(default)]
    pub tags: Vec<crate::tagging::TagDef>,
    /// List of engine IDs to enable for inline search results.
    /// If `None`, all engines from `search_engines::default_search_engines()` are used.
    #[serde(default)]
    pub enabled_engines: Option<Vec<String>>,
    /// Whether auto-tagging is enabled. Defaults to `false` if not set.
    #[serde(default)]
    pub tagging_enabled: Option<bool>,
    /// Tagging threshold (0.0 to 1.0). Tags with a score below this are
    /// discarded. Defaults to 0.60 if not set.
    #[serde(default)]
    pub tagging_threshold: Option<f64>,
    /// Hostnames to exclude from content fetching during import.
    /// Defaults to localhost addresses if not set.
    #[serde(default)]
    pub exclude_urls: Option<Vec<String>>,
    /// Per-engine configuration, keyed by engine ID.
    /// For example: `[engines.searxng]` with `instance = "https://..."`.
    #[serde(default)]
    pub engines: Option<HashMap<String, toml::Table>>,
    /// Default bookmark database path. Overrides the platform default.
    #[serde(default)]
    pub db_path: Option<String>,
}

impl Config {
    /// Load configuration from the default config file path.
    ///
    /// Returns a default (empty) `Config` if the file doesn't exist or can't be parsed.
    /// Parse errors are printed to stderr.
    ///
    /// # Example
    ///
    /// ```ignore
    /// let cfg = search_hub::config::Config::load();
    /// ```
    pub fn load() -> Self {
        Self::load_from(&config_file_path())
    }

    /// Load configuration from a specific file path.
    ///
    /// Returns a default (empty) `Config` if the file doesn't exist or can't be parsed.
    /// Parse errors are printed to stderr.
    ///
    /// # Example
    ///
    /// ```ignore
    /// let cfg = search_hub::config::Config::load_from(&PathBuf::from("/tmp/test.toml"));
    /// ```
    pub fn load_from(path: &PathBuf) -> Self {
        if path.exists() {
            Figment::new()
                .merge(Toml::file(path))
                .extract()
                .unwrap_or_else(|e| {
                    eprintln!("Warning: failed to parse config file {:?}: {}", path, e);
                    Config::default()
                })
        } else {
            Config::default()
        }
    }
}

impl Default for Config {
    fn default() -> Self {
        Config {
            tags: Vec::new(),
            enabled_engines: None,
            tagging_enabled: None,
            tagging_threshold: None,
            exclude_urls: None,
            engines: None,
            db_path: None,
        }
    }
}

impl Config {
    /// Resolve the list of enabled search engines.
    ///
    /// Default engines (`crates.io`) are included unless filtered by
    /// `enabled_engines`. Engines with configuration in the `engines` map
    /// (e.g. `searxng`) are added subject to the same filter.
    pub fn resolve_engines(&self) -> Vec<Box<dyn crate::search_engines::SearchEngine>> {
        let is_enabled = |id: &str| -> bool {
            self.enabled_engines
                .as_ref()
                .map(|enabled| enabled.iter().any(|e| e == id))
                .unwrap_or(true)
        };

        let mut engines: Vec<Box<dyn crate::search_engines::SearchEngine>> = Vec::new();

        for e in crate::search_engines::default_search_engines() {
            if is_enabled(e.id()) {
                engines.push(e);
            }
        }

        if let Some(ref configs) = self.engines {
            for (id, config) in configs {
                if !is_enabled(id) {
                    continue;
                }
                match id.as_str() {
                    "searxng" => {
                        if let Some(engine) = crate::search_engines::searxng::SearXng::from_config(config) {
                            engines.push(engine);
                        }
                    }
                    _ => {}
                }
            }
        }

        engines
    }
}

/// Return the expected config file path (e.g. `~/.config/search_hub/config.toml` on Linux).
///
/// # Example
///
/// ```ignore
/// let path = search_hub::config::config_file_path();
/// ```
///
/// # Panics
///
/// Panics if the platform has no valid config directory.
pub fn config_file_path() -> PathBuf {
    let dirs = directories::ProjectDirs::from("com", "search_hub", "search_hub")
        .expect("no valid config directory");
    let config_dir = dirs.config_dir();
    config_dir.join("config.toml")
}

#[cfg(test)]
mod tests {
    use super::*;
    use tempfile::NamedTempFile;
    use std::io::Write;

    #[test]
    fn load_from_missing_file_returns_default() {
        let cfg = Config::load_from(&PathBuf::from("/nonexistent/path.toml"));
        assert!(cfg.tags.is_empty());
        assert!(cfg.engines.is_none());
    }

    #[test]
    fn load_from_valid_file_with_engines() {
        let mut file = NamedTempFile::new().unwrap();
        write!(file, r#"
[engines.searxng]
instance = "https://search.example.com"
"#).unwrap();

        let cfg = Config::load_from(&file.path().to_path_buf());
        let engines = cfg.engines.unwrap();
        assert!(engines.contains_key("searxng"));
        let searxng = &engines["searxng"];
        assert_eq!(searxng.get("instance").unwrap().as_str(), Some("https://search.example.com"));
    }

    #[test]
    fn resolve_engines_includes_searxng_from_engines_map() {
        let mut file = NamedTempFile::new().unwrap();
        write!(file, r#"
[engines.searxng]
instance = "https://search.example.com"
"#).unwrap();

        let cfg = Config::load_from(&file.path().to_path_buf());
        let engines = cfg.resolve_engines();
        assert!(engines.iter().any(|e| e.id() == "searxng"));
    }

    #[test]
    fn resolve_engines_filters_with_enabled_list() {
        let mut file = NamedTempFile::new().unwrap();
        write!(file, r#"
enabled_engines = ["crates.io"]
[engines.searxng]
instance = "https://search.example.com"
"#).unwrap();

        let cfg = Config::load_from(&file.path().to_path_buf());
        let engines = cfg.resolve_engines();
        // searxng should be excluded because it's not in enabled_engines
        assert!(!engines.iter().any(|e| e.id() == "searxng"));
        // crates.io should be included
        assert!(engines.iter().any(|e| e.id() == "crates.io"));
    }

    #[test]
    fn resolve_engines_includes_searxng_when_in_enabled_list() {
        let mut file = NamedTempFile::new().unwrap();
        write!(file, r#"
enabled_engines = ["crates.io", "searxng"]
[engines.searxng]
instance = "https://search.example.com"
"#).unwrap();

        let cfg = Config::load_from(&file.path().to_path_buf());
        let engines = cfg.resolve_engines();
        assert!(engines.iter().any(|e| e.id() == "crates.io"));
        assert!(engines.iter().any(|e| e.id() == "searxng"));
    }

    #[test]
    fn parse_error_returns_default() {
        let mut file = NamedTempFile::new().unwrap();
        write!(file, "invalid toml [[[").unwrap();
        let cfg = Config::load_from(&file.path().to_path_buf());
        assert!(cfg.tags.is_empty());
        assert!(cfg.engines.is_none());
    }

    #[test]
    fn tagging_enabled_defaults_to_false() {
        let cfg = Config::default();
        assert_eq!(cfg.tagging_enabled.unwrap_or(false), false);
    }

    #[test]
    fn tagging_enabled_can_be_false() {
        let mut file = NamedTempFile::new().unwrap();
        write!(file, r#"tagging_enabled = false"#).unwrap();
        let cfg = Config::load_from(&file.path().to_path_buf());
        assert_eq!(cfg.tagging_enabled, Some(false));
    }

    #[test]
    fn tagging_enabled_can_be_true() {
        let mut file = NamedTempFile::new().unwrap();
        write!(file, r#"tagging_enabled = true"#).unwrap();
        let cfg = Config::load_from(&file.path().to_path_buf());
        assert_eq!(cfg.tagging_enabled, Some(true));
    }
}