Commit
Message
Changed Files (4)
-
modified README.md
diff --git a/README.md b/README.md index adb95d0..f87a5b6 100644 --- a/README.md +++ b/README.md @@ -109,6 +109,22 @@ instance = "https://search.kael.ink" # type = "mdn" # locale = "fr" # timeout_secs = 5.0 + +# Generic HTML-scraped engine (use with any search site) +# Provide a URL template with `{}` for the query and a CSS selector +# targeting the result container. Results are extracted from `<a>` links +# inside that container (deduplicated, up to 10, http/https only). +# +# Note: most commercial search engines (Google, Bing, DuckDuckGo, etc.) +# block automated requests. This engine works best with small/niche sites +# that don't enforce bot detection. To find the right selector, view the +# page source or use browser dev tools on the search results page. +# [[engines]] +# type = "generic" +# name = "DuckDuckGo" +# url = "https://html.duckduckgo.com/html/?q={}" +# selector = "div.results" +# timeout_secs = 10.0 ``` ## Run the web server as a systemd user service @@ -145,6 +161,47 @@ systemctl --user enable --now search-hub-self-update.timer This checks for new releases weekly and updates the binary automatically. +## Run with Podman / Docker + +A container image is available at `oci.vit.am/search-hub:latest`. It serves on port 8080 as the `search_hub` user and expects: + +- **Config** mounted at `/home/search_hub/.config/search_hub/config.toml` +- **Database** directory mounted at `/home/search_hub/.local/share/search_hub/` + +```sh +# Pull and run +podman run -d --name search-hub \ + -p 8080:8080 \ + -v ~/.config/search_hub:/home/search_hub/.config/search_hub:ro \ + -v ~/.local/share/search_hub:/home/search_hub/.local/share/search_hub \ + oci.vit.am/search-hub:latest serve + +# SIGHUP reload (re-reads config) +podman kill -s HUP search-hub + +# Build locally from the Containerfile +podman build -t search-hub:latest -f Containerfile . +``` + +### docker-compose + +```sh +docker compose up -d +``` + +See `docker-compose.yaml` at the project root. A SearXNG service is included as a commented-out example. + +### Podman Quadlet (systemd-native) + +```sh +mkdir -p ~/.config/containers/systemd +cp contrib/search-hub.container ~/.config/containers/systemd/ +systemctl --user daemon-reload +systemctl --user enable --now search-hub +``` + +The Quadlet file uses `%h` (your home directory) for volume source paths. + ## Resources - **Downloads:** [vit.am/~ololduck/search_hub/latest](https://vit.am/~ololduck/search_hub/latest/) -
modified src/config.rs
diff --git a/src/config.rs b/src/config.rs index 4dcac3a..e59bec0 100644 --- a/src/config.rs +++ b/src/config.rs @@ -41,6 +41,17 @@ pub enum EngineConfig { #[serde(default)] timeout_secs: Option<f32>, }, + /// Generic HTML-scraped search engine (configurable URL + CSS selector) + Generic { + /// Display name for this engine (e.g. "DuckDuckGo", "Stack Overflow"). + name: String, + /// URL template with `{}` placeholder for the query. + url: String, + /// CSS selector targeting the result container. + selector: String, + #[serde(default)] + timeout_secs: Option<f32>, + }, } #[async_trait] @@ -51,6 +62,7 @@ impl SearchEngine for EngineConfig { EngineConfig::SearXng { .. } => "searxng", EngineConfig::Wikipedia { .. } => "wikipedia", EngineConfig::Mdn { .. } => "mdn", + EngineConfig::Generic { .. } => "generic", } } @@ -60,15 +72,22 @@ impl SearchEngine for EngineConfig { EngineConfig::SearXng { .. } => "SearXNG", EngineConfig::Wikipedia { .. } => "Wikipedia", EngineConfig::Mdn { .. } => "MDN", + EngineConfig::Generic { name, .. } => name, } } fn url_template(&self) -> &str { - "" + match self { + EngineConfig::Generic { url, .. } => url, + _ => "", + } } fn selector(&self) -> &str { - "" + match self { + EngineConfig::Generic { selector, .. } => selector, + _ => "", + } } fn timeout(&self) -> Duration { @@ -76,7 +95,8 @@ impl SearchEngine for EngineConfig { EngineConfig::CratesIo { timeout_secs, .. } | EngineConfig::SearXng { timeout_secs, .. } | EngineConfig::Wikipedia { timeout_secs, .. } - | EngineConfig::Mdn { timeout_secs, .. } => timeout_secs, + | EngineConfig::Mdn { timeout_secs, .. } + | EngineConfig::Generic { timeout_secs, .. } => timeout_secs, }; secs.map(|s| Duration::from_secs_f32(s)).unwrap_or(Duration::from_secs(5)) } @@ -116,6 +136,15 @@ impl SearchEngine for EngineConfig { }; engine.fetch_results(query, client).await } + EngineConfig::Generic { name, url, selector, timeout_secs } => { + let engine = crate::search_engines::generic::Generic { + name: name.clone(), + url: url.clone(), + selector: selector.clone(), + timeout_secs: *timeout_secs, + }; + engine.fetch_results(query, client).await + } } } } -
added src/search_engines/generic.rs
diff --git a/src/search_engines/generic.rs b/src/search_engines/generic.rs new file mode 100644 index 0000000..d064ccd --- /dev/null +++ b/src/search_engines/generic.rs @@ -0,0 +1,43 @@ +use async_trait::async_trait; +use std::time::Duration; + +use super::SearchEngine; + +/// A fully configurable search engine defined by the user in config. +/// +/// The user provides a URL template with `{}` placeholder, a CSS selector +/// for the result container, and a display name. Result extraction uses +/// the trait's default `parse_results` (finds container by selector, +/// extracts `<a href>` links, deduplicates, returns up to 10 results). +#[derive(Debug, Clone)] +pub struct Generic { + pub name: String, + pub url: String, + pub selector: String, + pub timeout_secs: Option<f32>, +} + +#[async_trait] +impl SearchEngine for Generic { + fn id(&self) -> &str { + "generic" + } + + fn name(&self) -> &str { + &self.name + } + + fn url_template(&self) -> &str { + &self.url + } + + fn selector(&self) -> &str { + &self.selector + } + + fn timeout(&self) -> Duration { + self.timeout_secs + .map(|s| Duration::from_secs_f32(s)) + .unwrap_or(Duration::from_secs(5)) + } +} -
modified src/search_engines/mod.rs
diff --git a/src/search_engines/mod.rs b/src/search_engines/mod.rs index dad696f..58e42e5 100644 --- a/src/search_engines/mod.rs +++ b/src/search_engines/mod.rs @@ -1,4 +1,5 @@ pub mod crates_io; +pub mod generic; pub mod mdn; pub mod searxng; pub mod utils;