| @@ -39,7 +39,9 @@ impl Fetcher { |
| let client = reqwest::Client::builder() |
| .user_agent(USER_AGENT) |
| .build()?; |
| - let rt = tokio::runtime::Runtime::new()?; |
| + let rt = tokio::runtime::Builder::new_current_thread() |
| + .enable_time() |
| + .build()?; |
| let cache = Self::load_cache(&cache_path); |
| Ok(Self { client, rt, robots_cache: Mutex::new(cache), cache_path }) |
| } |
| @@ -58,9 +60,10 @@ impl Fetcher { |
| } |
| |
| fn save_cache(&self) { |
| - let cache = self.robots_cache.lock().unwrap(); |
| - if let Ok(data) = serde_json::to_string(&*cache) { |
| - let _ = fs::write(&self.cache_path, data); |
| + if let Ok(cache) = self.robots_cache.lock() { |
| + if let Ok(data) = serde_json::to_string(&*cache) { |
| + let _ = fs::write(&self.cache_path, data); |
| + } |
| } |
| } |
| |
| @@ -73,8 +76,11 @@ impl Fetcher { |
| let domain = parsed.host_str().unwrap_or("").to_string(); |
| let one_month = chrono::Duration::days(30); |
| |
| - { |
| - let mut cache = self.robots_cache.lock().unwrap(); |
| + let allowed = { |
| + let mut cache = match self.robots_cache.lock() { |
| + Ok(c) => c, |
| + Err(_) => return Err("internal error: lock poisoned".into()), |
| + }; |
| let expired = cache.get(&domain).map_or(true, |e| Utc::now() - e.fetched_at > one_month); |
| if !cache.contains_key(&domain) || expired { |
| let robots_url = format!("{}://{}/robots.txt", parsed.scheme(), domain); |
| @@ -84,10 +90,12 @@ impl Fetcher { |
| }; |
| cache.insert(domain.clone(), CacheEntry { body, fetched_at: Utc::now() }); |
| } |
| - } |
| + drop(cache); |
| |
| - let allowed = { |
| - let cache = self.robots_cache.lock().unwrap(); |
| + let cache = match self.robots_cache.lock() { |
| + Ok(c) => c, |
| + Err(_) => return Err("internal error: lock poisoned".into()), |
| + }; |
| DefaultMatcher::default() |
| .one_agent_allowed_by_robots(&cache[&domain].body, USER_AGENT, url) |
| }; |
| @@ -793,9 +801,19 @@ async fn run_import(source: &str, profile: Option<String>, db_path: &str, tags: |
| return; |
| } |
| |
| - pb.set_message("fetching and tagging..."); |
| + pb.finish_and_clear(); |
| |
| // Second pass: parallel fetch + tag via worker pool |
| + let pb = ProgressBar::new(pending.len() as u64); |
| + pb.enable_steady_tick(std::time::Duration::from_millis(100)); |
| + pb.set_style( |
| + ProgressStyle::default_bar() |
| + .template("{spinner:.green} [{elapsed_precise}] {bar:40.cyan/blue} {pos}/{len} {msg}") |
| + .unwrap() |
| + .progress_chars("##-"), |
| + ); |
| + pb.set_message("fetching and tagging..."); |
| + |
| let (tx, rx) = std::sync::mpsc::channel::<String>(); |
| let bar = Arc::new(pb.clone()); |
| let db = db_path.to_string(); |