search_hub

at 4551d56 Raw

use crate::search_engines::{ResultEntry, SearchEngine};
use crate::storage;
use actix_web::{get, web, App, HttpRequest, HttpResponse, HttpServer, Responder};
use rusqlite::Connection;
use std::sync::Mutex;
use std::time::Instant;
use tera::Tera;
use tracing::{error, info};

const USER_AGENT: &str = concat!("search_hub/", env!("CARGO_PKG_VERSION"));

pub struct DbPool(Mutex<Connection>);

impl DbPool {
    pub fn new(path: &str) -> Self {
        let conn = storage::init_db(path).expect("Failed to initialize database");
        DbPool(Mutex::new(conn))
    }

    pub fn conn(&self) -> std::sync::MutexGuard<'_, Connection> {
        self.0.lock().unwrap()
    }
}

const VERSION: &str = concat!(
    env!("CARGO_PKG_VERSION"),
    " (",
    env!("SEARCH_HUB_GIT_HASH"),
    ")",
);

#[get("/")]
async fn index(templates: web::Data<Tera>, port: web::Data<Port>) -> impl Responder {
    info!("serving index page");
    let mut ctx = tera::Context::new();
    ctx.insert("version", VERSION);
    ctx.insert("port", &(**port).0);
    match templates.render("index.html", &ctx) {
        Ok(rendered) => HttpResponse::Ok().content_type("text/html").body(rendered),
        Err(e) => {
            error!("Template error: {}", e);
            HttpResponse::InternalServerError().finish()
        }
    }
}

#[get("/opensearch.xml")]
async fn opensearch(templates: web::Data<Tera>, port: web::Data<Port>) -> impl Responder {
    let mut ctx = tera::Context::new();
    ctx.insert("port", &(**port).0);
    match templates.render("opensearch.xml", &ctx) {
        Ok(xml) => HttpResponse::Ok().content_type("application/opensearchdescription+xml").body(xml),
        Err(e) => {
            error!("Template error: {}", e);
            HttpResponse::InternalServerError().finish()
        }
    }
}

struct Port(u16);

#[get("/search")]
async fn search(
    req: HttpRequest,
    query: web::Query<SearchQuery>,
    templates: web::Data<Tera>,
    db_pool: web::Data<DbPool>,
    engines: web::Data<Vec<Box<dyn SearchEngine>>>,
) -> impl Responder {
    let start = Instant::now();
    let q = query.q.as_deref().unwrap_or("");
    let page = query.page.unwrap_or(1).max(1);
    let page_size: usize = 20;
    let has_query = !q.is_empty();
    info!("search request: query=\"{}\" page={}", q, page);

    let total_results = if has_query {
        storage::count_search_bookmarks(&db_pool.conn(), q).unwrap_or(0)
    } else {
        storage::count_bookmarks(&db_pool.conn()).unwrap_or(0)
    };
    let total_pages = (total_results + page_size - 1) / page_size;

    let bookmarks = if has_query {
        storage::search_bookmarks(&db_pool.conn(), q, page, page_size).unwrap_or_default()
    } else {
        storage::list_bookmarks(&db_pool.conn(), page, page_size).unwrap_or_default()
    };

    let user_agent = req
        .headers()
        .get("User-Agent")
        .and_then(|v| v.to_str().ok())
        .unwrap_or(USER_AGENT);

    let mut all_external: Vec<Vec<ResultEntry>> = Vec::new();
    let mut provider_count: usize = 0;
    if has_query {
        let client = reqwest::Client::builder()
            .user_agent(user_agent)
            .build()
            .ok();
        if let Some(client) = client {
            let mut handles = Vec::new();
            let engines = engines.clone();
            for i in 0..engines.len() {
                let engine_name = engines[i].name().to_string();
                let q_owned = q.to_string();
                let client = client.clone();
                let engines = engines.clone();
                handles.push(tokio::spawn(async move {
                    let t0 = Instant::now();
                    let result = engines[i].fetch_results(&q_owned, &client).await;
                    let elapsed = t0.elapsed();
                    (engine_name, result, elapsed)
                }));
            }
            for handle in handles {
                if let Ok((name, result, elapsed)) = handle.await {
                    provider_count += 1;
                    match result {
                        Ok(results) => {
                            info!(
                                "external {} ({} results) [{:.2?}]",
                                name,
                                results.len(),
                                elapsed
                            );
                            all_external.push(results);
                        }
                        Err(e) => {
                            info!("external {} (error) [{:.2?}]: {}", name, elapsed, e);
                        }
                    }
                }
            }
        }
    }

    let external_results = interleave(&all_external);

    let page_elapsed = start.elapsed();
    let page_time_ms = format!("{:.1}", page_elapsed.as_secs_f64() * 1000.0);
    info!(
        "search completed: {} bookmark results, {} external providers [{:.2?}]",
        bookmarks.len(),
        provider_count,
        page_elapsed
    );

    let mut external_engines: Vec<String> = external_results
        .iter()
        .map(|r| r.engine.clone())
        .collect();
    external_engines.sort();
    external_engines.dedup();

    let mut ctx = tera::Context::new();
    ctx.insert("bookmarks", &bookmarks);
    ctx.insert("query", &q);
    ctx.insert("page", &page);
    ctx.insert("total_pages", &total_pages);
    ctx.insert("total_results", &total_results);
    ctx.insert("version", VERSION);
    ctx.insert("page_time_ms", &page_time_ms);
    ctx.insert("external_results", &external_results);
    ctx.insert("external_engines", &external_engines);

    match templates.render("index.html", &ctx) {
        Ok(rendered) => HttpResponse::Ok().content_type("text/html").body(rendered),
        Err(e) => {
            error!("Template error: {}", e);
            HttpResponse::InternalServerError().finish()
        }
    }
}

#[derive(serde::Deserialize)]
pub struct SearchQuery {
    pub q: Option<String>,
    pub page: Option<usize>,
}

pub async fn run_server(
    db_path: &str,
    port: u16,
    engines: Vec<Box<dyn SearchEngine>>,
) -> std::io::Result<()> {
    let db_pool = web::Data::new(DbPool::new(db_path));
    let engines = web::Data::new(engines);
    let port_data = web::Data::new(Port(port));
    let mut tera = Tera::default();
    tera.add_raw_template("index.html", include_str!("../templates/index.html"))
        .expect("Failed to parse index template");
    tera.add_raw_template("opensearch.xml", include_str!("../templates/opensearch.xml"))
        .expect("Failed to parse opensearch template");
    let tera = web::Data::new(tera);

    HttpServer::new(move || {
        App::new()
            .app_data(tera.clone())
            .app_data(db_pool.clone())
            .app_data(engines.clone())
            .app_data(port_data.clone())
            .service(index)
            .service(search)
            .service(opensearch)
    })
    .workers(2)
    .bind(("127.0.0.1", port))?
    .run()
    .await
}

fn interleave(per_engine: &[Vec<ResultEntry>]) -> Vec<ResultEntry> {
    let max_len = per_engine.iter().map(|r| r.len()).max().unwrap_or(0);
    let mut out = Vec::with_capacity(max_len * per_engine.len());
    for i in 0..max_len {
        for results in per_engine {
            if let Some(entry) = results.get(i) {
                out.push(entry.clone());
            }
        }
    }
    out
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::models::Bookmark;
    use chrono::Utc;

    fn test_tera() -> Tera {
        let mut tera = Tera::default();
        tera.add_raw_template("index.html", include_str!("../templates/index.html"))
            .expect("template parse");
        tera
    }

    #[test]
    fn render_template_no_query() {
        let mut ctx = tera::Context::new();
        ctx.insert("version", &"0.0.0");
        let rendered = test_tera().render("index.html", &ctx).expect("render");
        assert!(rendered.contains("Bookmark Search"));
        assert!(rendered.contains("enter a query"));
    }

    #[test]
    fn render_template_with_results() {
        let mut ctx = tera::Context::new();
        ctx.insert("query", &"rust");
        ctx.insert("page", &1usize);
        ctx.insert("total_pages", &1usize);
        ctx.insert("total_results", &1usize);
        ctx.insert("bookmarks", &vec![
            Bookmark {
                id: 1,
                title: "Rust Lang".into(),
                url: "https://rust-lang.org".into(),
                description: Some("The Rust programming language".into()),
                source: "bookmark".into(),
                content: None,
                tags: None,
                created_at: Utc::now(),
            },
        ]);
        ctx.insert("version", &"0.0.0");
        ctx.insert("external_results", &Vec::<ResultEntry>::new());
        let rendered = test_tera().render("index.html", &ctx).expect("render");
        assert!(rendered.contains("Rust Lang"));
        assert!(rendered.contains("rust-lang.org"));
        assert!(rendered.contains("1 result"));
    }

    #[test]
    fn render_template_no_results() {
        let mut ctx = tera::Context::new();
        ctx.insert("query", &"zzznotfound");
        ctx.insert("page", &1usize);
        ctx.insert("total_pages", &0usize);
        ctx.insert("total_results", &0usize);
        ctx.insert("bookmarks", &Vec::<Bookmark>::new());
        ctx.insert("version", &"0.0.0");
        ctx.insert("external_results", &Vec::<ResultEntry>::new());
        let rendered = test_tera().render("index.html", &ctx).expect("render");
        assert!(rendered.contains("no bookmarks found"));
    }

    #[test]
    fn test_interleave_empty() {
        let result = interleave(&[]);
        assert!(result.is_empty());
    }

    #[test]
    fn test_interleave_single_engine() {
        let e = vec![
            ResultEntry { title: "A".into(), url: "http://a".into(), description: None, engine: "e1".into() },
            ResultEntry { title: "B".into(), url: "http://b".into(), description: None, engine: "e1".into() },
        ];
        let result = interleave(&[e]);
        assert_eq!(result.len(), 2);
        assert_eq!(result[0].title, "A");
        assert_eq!(result[1].title, "B");
    }

    #[test]
    fn test_interleave_two_engines_equal_length() {
        let e1 = vec![
            ResultEntry { title: "A1".into(), url: "http://a1".into(), description: None, engine: "e1".into() },
            ResultEntry { title: "A2".into(), url: "http://a2".into(), description: None, engine: "e1".into() },
        ];
        let e2 = vec![
            ResultEntry { title: "B1".into(), url: "http://b1".into(), description: None, engine: "e2".into() },
            ResultEntry { title: "B2".into(), url: "http://b2".into(), description: None, engine: "e2".into() },
        ];
        let result = interleave(&[e1, e2]);
        assert_eq!(result.len(), 4);
        assert_eq!(result[0].title, "A1");
        assert_eq!(result[1].title, "B1");
        assert_eq!(result[2].title, "A2");
        assert_eq!(result[3].title, "B2");
    }

    #[test]
    fn test_interleave_uneven_length() {
        let e1 = vec![
            ResultEntry { title: "A1".into(), url: "http://a1".into(), description: None, engine: "e1".into() },
            ResultEntry { title: "A2".into(), url: "http://a2".into(), description: None, engine: "e1".into() },
            ResultEntry { title: "A3".into(), url: "http://a3".into(), description: None, engine: "e1".into() },
        ];
        let e2 = vec![
            ResultEntry { title: "B1".into(), url: "http://b1".into(), description: None, engine: "e2".into() },
        ];
        let result = interleave(&[e1, e2]);
        assert_eq!(result.len(), 4);
        assert_eq!(result[0].title, "A1");
        assert_eq!(result[1].title, "B1");
        assert_eq!(result[2].title, "A2");
        assert_eq!(result[3].title, "A3");
    }
}