Commit
Message
Changed Files (3)
-
modified README.md
diff --git a/README.md b/README.md index c472ac9..76b72fa 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # SearchHub -A local search engine for your browser bookmarks. Import bookmarks from Firefox, Zen, Chrome, or Chromium, search them with full-text queries, and optionally forward searches to external engines like crates.io (via its public JSON API) or SearXNG (aggregates results from dozens of backends). Content is automatically tagged via local ONNX embeddings. +A local search engine for your browser bookmarks. Import bookmarks from Firefox, Zen, Chrome, or Chromium, search them with full-text queries, and optionally forward searches to external engines like crates.io (via its public JSON API) or SearXNG (aggregates results from dozens of backends). Content can be automatically tagged via local ONNX embeddings (opt-in; set `tagging_enabled = true` in config). ## Install @@ -48,9 +48,9 @@ Search queries are also forwarded to external engines: [crates.io](https://crate | `search_hub import zen` | Import from Zen Browser | | `search_hub search "query"` | Search bookmarks from the terminal | | `search_hub list` | List all bookmarks | -| `search_hub insert "Title" https://..."` | Add a bookmark (fetches content, auto-tags) | +| `search_hub insert "Title" https://..."` | Add a bookmark (fetches content, auto-tags if enabled) | | `search_hub remove --id 1` | Delete a bookmark by ID | -| `search_hub retag --all` | Re-run auto-tagging on all bookmarks | +| `search_hub retag --all` | Re-run auto-tagging (requires `tagging_enabled = true` in config) | | `search_hub init-config` | Create a default config file at `~/.config/search_hub/config.toml` | All commands use `~/.local/share/search_hub/bookmarks.db` by default. Override with `--db-path` or set `db_path` in the config file. @@ -73,6 +73,9 @@ Run `search_hub init-config` to create `~/.config/search_hub/config.toml` with a # Which external search engines to use (default: ["crates.io"]) # enabled_engines = ["crates.io"] +# Enable auto-tagging (default: false, requires ONNX model download on first use) +# tagging_enabled = true + # Minimum confidence for auto-tagging (0.0 to 1.0, default: 0.6) # tagging_threshold = 0.6 -
modified src/config.rs
diff --git a/src/config.rs b/src/config.rs index 4a1c1c1..67f39f7 100644 --- a/src/config.rs +++ b/src/config.rs @@ -40,7 +40,7 @@ pub struct ForwarderDef { /// Application configuration loaded from the TOML config file. /// -/// Supports `[[tags]]`, `enabled_engines`, `tagging_threshold`, and `[engines.*]`. +/// Supports `[[tags]]`, `enabled_engines`, `tagging_enabled`, `tagging_threshold`, and `[engines.*]`. /// /// # Example /// @@ -61,6 +61,9 @@ pub struct Config { /// If `None`, all engines from `search_engines::default_search_engines()` are used. #[serde(default)] pub enabled_engines: Option<Vec<String>>, + /// Whether auto-tagging is enabled. Defaults to `false` if not set. + #[serde(default)] + pub tagging_enabled: Option<bool>, /// Tagging threshold (0.0 to 1.0). Tags with a score below this are /// discarded. Defaults to 0.60 if not set. #[serde(default)] @@ -123,6 +126,7 @@ impl Default for Config { Config { tags: Vec::new(), enabled_engines: None, + tagging_enabled: None, tagging_threshold: None, exclude_urls: None, engines: None, @@ -272,4 +276,26 @@ instance = "https://search.example.com" assert!(cfg.tags.is_empty()); assert!(cfg.engines.is_none()); } + + #[test] + fn tagging_enabled_defaults_to_false() { + let cfg = Config::default(); + assert_eq!(cfg.tagging_enabled.unwrap_or(false), false); + } + + #[test] + fn tagging_enabled_can_be_false() { + let mut file = NamedTempFile::new().unwrap(); + write!(file, r#"tagging_enabled = false"#).unwrap(); + let cfg = Config::load_from(&file.path().to_path_buf()); + assert_eq!(cfg.tagging_enabled, Some(false)); + } + + #[test] + fn tagging_enabled_can_be_true() { + let mut file = NamedTempFile::new().unwrap(); + write!(file, r#"tagging_enabled = true"#).unwrap(); + let cfg = Config::load_from(&file.path().to_path_buf()); + assert_eq!(cfg.tagging_enabled, Some(true)); + } } -
modified src/main.rs
diff --git a/src/main.rs b/src/main.rs index ec4d473..61a7960 100644 --- a/src/main.rs +++ b/src/main.rs @@ -249,6 +249,7 @@ async fn main() { None => Config::load(), }; let engines: Vec<Box<dyn SearchEngine>> = config.resolve_engines(); + let tagging_enabled = config.tagging_enabled.unwrap_or(false); let tag_threshold: f32 = config.tagging_threshold.map(|t| t as f32).unwrap_or(0.60); let exclude_hosts: Vec<String> = config.exclude_urls.clone().unwrap_or_else(|| { vec!["localhost".into(), "127.0.0.1".into(), "::1".into()] @@ -303,6 +304,10 @@ async fn main() { fetch_and_convert(&mut fetcher, &url, None) }; let md = content.as_ref().and_then(|c| { + if !tagging_enabled { + info!("tagging disabled via config"); + return None; + } info!("tagging content..."); match TaggingEngine::new(&tags, tag_threshold) { Ok(mut engine) => { @@ -366,6 +371,11 @@ async fn main() { } } Command::Retag { id, all, db_path } => { + if !tagging_enabled { + println!("Tagging is disabled in config. Enable it with tagging_enabled = true."); + return; + } + let db_path = resolve_db_path(db_path, config.db_path.as_deref()); let conn = storage::init_db(&db_path.to_string_lossy()).expect("Failed to open database"); @@ -505,6 +515,9 @@ async fn main() { # Which external search engines to use (default: [\"crates.io\"])\n\ # enabled_engines = [\"crates.io\"]\n\ \n\ + # Whether auto-tagging is enabled (default: false)\n\ + # tagging_enabled = true\n\ + \n\ # Minimum confidence for auto-tagging (0.0 to 1.0, default: 0.6)\n\ # tagging_threshold = 0.6\n\ \n\ @@ -530,15 +543,15 @@ async fn main() { match action { ImportAction::Bookmarks { source, profile, db_path } => { let db_path = resolve_db_path(db_path, config.db_path.as_deref()); - run_import(&source, profile, &db_path.to_string_lossy(), tags.clone(), tag_threshold, &exclude_hosts, ImportKind::Bookmarks).await; + run_import(&source, profile, &db_path.to_string_lossy(), tags.clone(), tagging_enabled, tag_threshold, &exclude_hosts, ImportKind::Bookmarks).await; } ImportAction::History { source, profile, db_path } => { let db_path = resolve_db_path(db_path, config.db_path.as_deref()); - run_import(&source, profile, &db_path.to_string_lossy(), tags.clone(), tag_threshold, &exclude_hosts, ImportKind::History).await; + run_import(&source, profile, &db_path.to_string_lossy(), tags.clone(), tagging_enabled, tag_threshold, &exclude_hosts, ImportKind::History).await; } ImportAction::All { source, profile, db_path } => { let db_path = resolve_db_path(db_path, config.db_path.as_deref()); - run_import(&source, profile, &db_path.to_string_lossy(), tags.clone(), tag_threshold, &exclude_hosts, ImportKind::All).await; + run_import(&source, profile, &db_path.to_string_lossy(), tags.clone(), tagging_enabled, tag_threshold, &exclude_hosts, ImportKind::All).await; } } } @@ -598,7 +611,7 @@ fn resolve_profiles(importer: &(impl Importer + ?Sized), profile: Option<String> } } -async fn run_import(source: &str, profile: Option<String>, db_path: &str, tags: Vec<TagDef>, tag_threshold: f32, exclude_hosts: &[String], kind: ImportKind) { +async fn run_import(source: &str, profile: Option<String>, db_path: &str, tags: Vec<TagDef>, tagging_enabled: bool, tag_threshold: f32, exclude_hosts: &[String], kind: ImportKind) { let importer: Box<dyn Importer> = match source { "firefox" => Box::new(FirefoxImporter), "zen" => Box::new(ZenImporter), @@ -705,6 +718,7 @@ async fn run_import(source: &str, profile: Option<String>, db_path: &str, tags: let task_tags = tags.clone(); let task_threshold = tag_threshold; let task_exclude = exclude_hosts.to_vec(); + let task_tagging_enabled = tagging_enabled; tokio::task::spawn_blocking(move || { let mut fetcher = match Fetcher::new() { Ok(f) => f, @@ -713,7 +727,11 @@ async fn run_import(source: &str, profile: Option<String>, db_path: &str, tags: return; } }; - let mut tagger = TaggingEngine::new(&task_tags, task_threshold).ok(); + let mut tagger = if task_tagging_enabled { + TaggingEngine::new(&task_tags, task_threshold).ok() + } else { + None + }; let conn = storage::init_db(&db) .expect("Failed to open target database");