Commit
Message
Changed Files (6)
-
modified README.md
diff --git a/README.md b/README.md index 3b1fc7b..c472ac9 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ A local search engine for your browser bookmarks. Import bookmarks from Firefox, ## Install -**Binaries** are available at [vit.am/~ololduck/search_hub/latest](https://vit.am/~ololduck/search_hub/latest/) — download the archive or the statically-linked binary for your architecture, extract, and run. +**Binaries** are available at [vit.am/~ololduck/search_hub/latest](https://vit.am/~ololduck/search_hub/latest/). Download the archive or the statically-linked binary for your architecture, extract, and run. **Source:** Clone the [repository](https://vit.am/~ololduck/search_hub/repository.git) and build with Rust: @@ -55,7 +55,7 @@ Search queries are also forwarded to external engines: [crates.io](https://crate All commands use `~/.local/share/search_hub/bookmarks.db` by default. Override with `--db-path` or set `db_path` in the config file. -The first time you use a search or insert command, SearchHub downloads an ONNX embedding model to `.fastembed_cache/` in the project directory (about 30 MB). +The first time you use a search or insert command, SearchHub downloads an ONNX embedding model to `.fastembed_cache/` in the project directory (about 127 MB). ## Configuration @@ -99,7 +99,7 @@ systemctl --user enable --now search-hub-web.service Check status with `systemctl --user status search-hub-web`. View logs with `journalctl --user -u search-hub-web -f`. -## Auto-import with systemd (Zen Browser) +## Auto-import with systemd ```sh cp contrib/search-hub-import.service ~/.config/systemd/user/ @@ -108,7 +108,7 @@ systemctl --user daemon-reload systemctl --user enable --now search-hub-import.timer ``` -This imports bookmarks from Zen Browser daily. +This imports bookmarks from Zen Browser daily. Edit the file to import from another browser. ## Resources -
modified abbaye.toml
diff --git a/abbaye.toml b/abbaye.toml index 25233ab..6042789 100644 --- a/abbaye.toml +++ b/abbaye.toml @@ -1,4 +1,4 @@ -"$shema" = "https://vit.am/~ololduck/abbaye/latest/dist/abbaye.schema.json" +"$schema" = "https://vit.am/~ololduck/abbaye/latest/dist/abbaye.schema.json" [site] name = "search_hub" @@ -12,6 +12,7 @@ dirty_suffix = "-dirty" [changelog] [git_ui] +clone_url = "https://vit.am/~ololduck/search_hub/repository.git" [[builders]] type = "archive" -
modified src/search_engines/crates_io.rs
diff --git a/src/search_engines/crates_io.rs b/src/search_engines/crates_io.rs index 9e34d8c..c9fadad 100644 --- a/src/search_engines/crates_io.rs +++ b/src/search_engines/crates_io.rs @@ -89,3 +89,38 @@ impl SearchEngine for CratesIo { pub fn engine() -> CratesIo { CratesIo } + +#[cfg(test)] +mod tests { + use super::*; + use crate::search_engines::SearchEngine; + + #[test] + fn test_id() { + assert_eq!(CratesIo.id(), "crates.io"); + } + + #[test] + fn test_name() { + assert_eq!(CratesIo.name(), "crates.io"); + } + + #[test] + fn test_url_template() { + assert_eq!( + CratesIo.url_template(), + "https://crates.io/api/v1/crates?q={}&per_page=10" + ); + } + + #[test] + fn test_selector() { + assert_eq!(CratesIo.selector(), ""); + } + + #[test] + fn test_engine_construct() { + let e = engine(); + assert_eq!(e.id(), "crates.io"); + } +} -
modified src/search_engines/mod.rs
diff --git a/src/search_engines/mod.rs b/src/search_engines/mod.rs index 216f8df..31a2158 100644 --- a/src/search_engines/mod.rs +++ b/src/search_engines/mod.rs @@ -206,3 +206,171 @@ fn urlencode(s: &str) -> String { } out } + +#[cfg(test)] +mod tests { + use super::*; + use async_trait::async_trait; + + struct TestEngine; + + #[async_trait] + impl SearchEngine for TestEngine { + fn id(&self) -> &str { "test" } + fn name(&self) -> &str { "Test" } + fn url_template(&self) -> &str { "https://example.com/?q={}" } + fn selector(&self) -> &str { "div.results" } + } + + #[test] + fn test_urlencode_plain() { + assert_eq!(urlencode("hello"), "hello"); + } + + #[test] + fn test_urlencode_spaces() { + assert_eq!(urlencode("hello world"), "hello+world"); + } + + #[test] + fn test_urlencode_special() { + assert_eq!(urlencode("a&b/c"), "a%26b%2Fc"); + } + + #[test] + fn test_urlencode_empty() { + assert_eq!(urlencode(""), ""); + } + + #[test] + fn test_urlencode_alphanum() { + assert_eq!(urlencode("ABC123-_~."), "ABC123-_~."); + } + + #[test] + fn test_engine_error_display() { + let e = EngineError("oops".into()); + assert_eq!(format!("{e}"), "oops"); + } + + #[test] + fn test_engine_error_debug() { + let e = EngineError("oops".into()); + assert!(format!("{e:?}").contains("EngineError")); + } + + #[test] + fn test_engine_error_is_error() { + use std::error::Error; + let e = EngineError("oops".into()); + assert!(e.source().is_none()); + } + + #[test] + fn test_search_url_replaces_placeholder() { + let e = TestEngine; + assert_eq!(e.search_url("rust"), "https://example.com/?q=rust"); + } + + #[test] + fn test_search_url_encodes_query() { + let e = TestEngine; + assert_eq!( + e.search_url("hello world"), + "https://example.com/?q=hello+world" + ); + } + + #[test] + fn test_search_url_special_chars() { + let e = TestEngine; + assert_eq!( + e.search_url("a&b"), + "https://example.com/?q=a%26b" + ); + } + + #[test] + fn test_default_search_engines() { + let engines = default_search_engines(); + assert_eq!(engines.len(), 1); + assert_eq!(engines[0].id(), "crates.io"); + } + + #[test] + fn test_parse_results_empty_html() { + let e = TestEngine; + let result = e.parse_results("<html></html>"); + assert!(result.is_err()); + assert_eq!(format!("{}", result.unwrap_err()), "no container matched"); + } + + #[test] + fn test_parse_results_no_links() { + let e = TestEngine; + let html = r#"<html><body><div class="results"><p>nothing here</p></div></body></html>"#; + let result = e.parse_results(html); + assert!(result.is_err()); + assert_eq!(format!("{}", result.unwrap_err()), "no results found"); + } + + #[test] + fn test_parse_results_extracts_links() { + let e = TestEngine; + let html = r#"<html><body><div class="results"><a href="https://example.com/1">First Result</a><a href="https://example.com/2">Second Result</a></div></body></html>"#; + let result = e.parse_results(html); + assert!(result.is_ok()); + let entries = result.unwrap(); + assert_eq!(entries.len(), 2); + assert_eq!(entries[0].title, "First Result"); + assert_eq!(entries[0].url, "https://example.com/1"); + assert_eq!(entries[0].engine, "Test"); + assert_eq!(entries[1].title, "Second Result"); + assert_eq!(entries[1].url, "https://example.com/2"); + } + + #[test] + fn test_parse_results_short_title_skipped() { + let e = TestEngine; + let html = r#"<html><body><div class="results"><a href="https://example.com/1">ab</a><a href="https://example.com/2">Long Title</a></div></body></html>"#; + let result = e.parse_results(html); + assert!(result.is_ok()); + let entries = result.unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].url, "https://example.com/2"); + } + + #[test] + fn test_parse_results_dedup_by_url() { + let e = TestEngine; + let html = r#"<html><body><div class="results"><a href="https://example.com/1">First</a><a href="https://example.com/1">First Duplicate</a></div></body></html>"#; + let result = e.parse_results(html); + assert!(result.is_ok()); + let entries = result.unwrap(); + assert_eq!(entries.len(), 1); + } + + #[test] + fn test_parse_results_non_http_skipped() { + let e = TestEngine; + let html = r#"<html><body><div class="results"><a href="https://example.com/1">Valid</a><a href="javascript:void(0)">JS Link</a><a href="mailto:test@test.com">Email</a></div></body></html>"#; + let result = e.parse_results(html); + assert!(result.is_ok()); + let entries = result.unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].url, "https://example.com/1"); + } + + #[test] + fn test_parse_results_max_ten() { + let e = TestEngine; + let mut links = String::new(); + for i in 0..15 { + links.push_str(&format!(r#"<a href="https://example.com/{i}">Result {i}</a>"#)); + } + let html = format!(r#"<html><body><div class="results">{links}</div></body></html>"#); + let result = e.parse_results(&html); + assert!(result.is_ok()); + assert_eq!(result.unwrap().len(), 10); + } +} -
modified src/search_engines/searxng.rs
diff --git a/src/search_engines/searxng.rs b/src/search_engines/searxng.rs index 573c108..ea9699e 100644 --- a/src/search_engines/searxng.rs +++ b/src/search_engines/searxng.rs @@ -91,3 +91,100 @@ impl SearchEngine for SearXng { } } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::search_engines::SearchEngine; + + #[test] + fn test_id() { + let e = SearXng { + instance: "https://example.com".into(), + url_tpl: "https://example.com/search?format=json&q={}".into(), + }; + assert_eq!(e.id(), "searxng"); + } + + #[test] + fn test_name() { + let e = SearXng { + instance: "https://example.com".into(), + url_tpl: "https://example.com/search?format=json&q={}".into(), + }; + assert_eq!(e.name(), "SearXNG"); + } + + #[test] + fn test_selector() { + let e = SearXng { + instance: "https://example.com".into(), + url_tpl: "https://example.com/search?format=json&q={}".into(), + }; + assert_eq!(e.selector(), ""); + } + + #[test] + fn test_url_template_returns_from_struct() { + let e = SearXng { + instance: "https://my-instance.net".into(), + url_tpl: "https://my-instance.net/search?format=json&q={}".into(), + }; + assert_eq!( + e.url_template(), + "https://my-instance.net/search?format=json&q={}" + ); + } + + #[test] + fn test_from_config_valid() { + let mut config = toml::Table::new(); + config.insert("instance".into(), toml::Value::String("https://search.example.com".into())); + let result = SearXng::from_config(&config); + assert!(result.is_some()); + let engine = result.unwrap(); + assert_eq!(engine.id(), "searxng"); + assert_eq!( + engine.url_template(), + "https://search.example.com/search?format=json&q={}" + ); + } + + #[test] + fn test_from_config_trailing_slash_stripped() { + let mut config = toml::Table::new(); + config.insert("instance".into(), toml::Value::String("https://search.example.com/".into())); + let result = SearXng::from_config(&config); + assert!(result.is_some()); + let engine = result.unwrap(); + assert_eq!( + engine.url_template(), + "https://search.example.com/search?format=json&q={}" + ); + } + + #[test] + fn test_from_config_missing_instance() { + let config = toml::Table::new(); + assert!(SearXng::from_config(&config).is_none()); + } + + #[test] + fn test_from_config_non_string_instance() { + let mut config = toml::Table::new(); + config.insert("instance".into(), toml::Value::Integer(42)); + assert!(SearXng::from_config(&config).is_none()); + } + + #[test] + fn test_search_url_uses_template() { + let e = SearXng { + instance: "https://example.com".into(), + url_tpl: "https://example.com/search?format=json&q={}".into(), + }; + assert_eq!( + e.search_url("tokio"), + "https://example.com/search?format=json&q=tokio" + ); + } +} -
added tests/search_engines_integration.rs
diff --git a/tests/search_engines_integration.rs b/tests/search_engines_integration.rs new file mode 100644 index 0000000..9bb1151 --- /dev/null +++ b/tests/search_engines_integration.rs @@ -0,0 +1,107 @@ +use std::sync::OnceLock; + +use search_hub::search_engines::SearchEngine; + +static RT: OnceLock<tokio::runtime::Runtime> = OnceLock::new(); + +fn rt() -> &'static tokio::runtime::Runtime { + RT.get_or_init(|| tokio::runtime::Runtime::new().unwrap()) +} + +fn client() -> reqwest::Client { + reqwest::Client::builder() + .user_agent("search_hub_test") + .build() + .unwrap() +} + +#[test] +fn crates_io_returns_results_for_generic_query() { + let engine = search_hub::search_engines::crates_io::CratesIo; + let client = client(); + + let results = rt().block_on(engine.fetch_results("tokio", &client)); + + assert!(results.is_ok(), "crates.io search should succeed: {:?}", results.err()); + let entries = results.unwrap(); + assert!(!entries.is_empty(), "should return at least one crate for 'tokio'"); + assert!(entries.len() <= 10, "max 10 results"); + + for entry in &entries { + assert!(!entry.title.is_empty(), "title should not be empty"); + assert!(!entry.url.is_empty(), "url should not be empty"); + assert!(entry.url.starts_with("http"), "url should start with http"); + assert_eq!(entry.engine, "crates.io", "engine should be crates.io"); + } + + println!("crates.io returned {} results for 'tokio':", entries.len()); + for e in &entries { + println!(" - {} ({})", e.title, e.url); + } +} + +#[test] +fn crates_io_search_uses_https_urls() { + let engine = search_hub::search_engines::crates_io::CratesIo; + let client = client(); + + let results = rt().block_on(engine.fetch_results("serde", &client)); + + assert!(results.is_ok(), "crates.io search should succeed: {:?}", results.err()); + let entries = results.unwrap(); + assert!(!entries.is_empty(), "should return at least one crate for 'serde'"); + + for entry in &entries { + assert!( + entry.url.starts_with("https://"), + "url should be https: {}", + entry.url + ); + } +} + +#[test] +fn crates_io_empty_query_returns_error() { + let engine = search_hub::search_engines::crates_io::CratesIo; + let client = client(); + + let results = rt().block_on(engine.fetch_results("zzzzzzzzzz_nonexistent_crate_xxxxxxxxx", &client)); + + assert!(results.is_err(), "should return error for nonsense query"); +} + +#[test] +fn searxng_returns_results_if_configured() { + let instance = match std::env::var("SEARCH_HUB_SEARXNG_INSTANCE") { + Ok(v) => v, + Err(_) => { + eprintln!("skipping searxng test: SEARCH_HUB_SEARXNG_INSTANCE not set"); + return; + } + }; + + let engine = search_hub::search_engines::searxng::SearXng { + instance: instance.clone(), + url_tpl: format!("{}/search?format=json&q={{}}", instance.trim_end_matches('/')), + }; + let client = client(); + + let results = rt().block_on(engine.fetch_results("rust", &client)); + + match results { + Ok(entries) => { + assert!(!entries.is_empty(), "searxng should return results for 'rust'"); + for entry in &entries { + assert!(!entry.title.is_empty(), "title should not be empty"); + assert!(!entry.url.is_empty(), "url should not be empty"); + } + println!("searxng returned {} results for 'rust':", entries.len()); + for e in &entries { + println!(" [{}] {} ({})", e.engine, e.title, e.url); + } + } + Err(e) => { + panic!("searxng search failed: {e}"); + } + } +}