search_hub

at 8f8b2d8 Raw

use crate::importer::{open_backup_file, Importer};
use crate::models::Bookmark;
use chrono::{DateTime, TimeZone, Utc};
use serde::Deserialize;
use std::path::{Path, PathBuf};

/// Chrome/Chromium importer.
///
/// Parses the `Bookmarks` JSON file from the browser profile directory.
/// Profiles are discovered in `~/.config/google-chrome/` and
/// `~/.config/chromium/`.
pub struct ChromeImporter;

impl Importer for ChromeImporter {
    fn name(&self) -> &'static str {
        "Chrome/Chromium"
    }

    fn discover_profiles(&self) -> Vec<PathBuf> {
        let mut profiles = Vec::new();
        for base in ["google-chrome", "chromium"] {
            if let Some(dir) = home_dir().map(|p| p.join(".config").join(base)) {
                if let Ok(entries) = std::fs::read_dir(dir) {
                    for entry in entries.flatten() {
                        let path = entry.path();
                        if path.is_dir() {
                            let name = path.file_name().and_then(|n| n.to_str());
                            if (name == Some("Default") || name.is_some_and(|n| n.starts_with("Profile ")))
                                && path.join("Bookmarks").exists() {
                                    profiles.push(path);
                                }
                        }
                    }
                }
            }
        }
        profiles
    }

    fn import(&self, profile_path: &Path) -> anyhow::Result<Vec<Bookmark>> {
        let bookmarks_path = profile_path.join("Bookmarks");
        if !bookmarks_path.exists() {
            anyhow::bail!("Bookmarks file not found in {profile_path:?}");
        }

        let content = std::fs::read_to_string(&bookmarks_path)?;
        let root: ChromeBookmarks = serde_json::from_str(&content)?;

        let mut results = Vec::new();
        collect_children(&root.roots.bookmark_bar.children, &mut results, 0);
        collect_children(&root.roots.other.children, &mut results, 0);
        collect_children(&root.roots.synced.children, &mut results, 0);

        Ok(results)
    }

    fn import_history(&self, profile_path: &Path) -> anyhow::Result<Vec<Bookmark>> {
        let conn = open_backup_file(profile_path, "History")?;

        let mut stmt = conn.prepare(
            "SELECT id, url, COALESCE(title, ''), last_visit_time
             FROM urls
             WHERE hidden = 0
             ORDER BY last_visit_time DESC",
        )?;

        let entries = stmt
            .query_map([], |row| {
                let id: i32 = row.get(0)?;
                let url: String = row.get(1)?;
                let title: String = row.get(2)?;
                let chrome_time: i64 = row.get(3)?;

                let created_at = chrome_time_i64_to_datetime(chrome_time)
                    .unwrap_or_else(|| DateTime::from_timestamp(0, 0).unwrap());

                Ok(Bookmark {
                    id,
                    title,
                    url,
                    description: None,
                    source: "history".into(),
                    content: None,
                    tags: None,
                    created_at,
                })
            })?
            .collect::<Result<Vec<_>, _>>()?;

        Ok(entries)
    }
}

#[derive(Deserialize)]
struct ChromeBookmarks {
    roots: ChromeRoots,
}

#[derive(Deserialize)]
struct ChromeRoots {
    bookmark_bar: ChromeFolder,
    other: ChromeFolder,
    synced: ChromeFolder,
}

#[derive(Deserialize)]
struct ChromeFolder {
    children: Vec<ChromeNode>,
}

#[derive(Deserialize)]
struct ChromeNode {
    #[serde(rename = "type")]
    node_type: String,
    name: String,
    url: Option<String>,
    #[serde(default)]
    children: Vec<ChromeNode>,
    date_added: Option<String>,
}

fn chrome_time_to_datetime(chrome_time: &str) -> Option<DateTime<Utc>> {
    let micros: i64 = chrome_time.parse().ok()?;
    chrome_time_i64_to_datetime(micros)
}

fn chrome_time_i64_to_datetime(micros: i64) -> Option<DateTime<Utc>> {
    // Chrome time is microseconds since 1601-01-01 (Windows epoch).
    // Unix epoch is 1970-01-01, offset by 11644473600 seconds.
    let unix_secs = (micros / 1_000_000).checked_sub(11644473600)?;
    Utc.timestamp_opt(unix_secs, 0).single()
}

fn collect_children(nodes: &[ChromeNode], out: &mut Vec<Bookmark>, next_id: i32) -> i32 {
    let mut id = next_id;
    for node in nodes {
        if node.node_type == "url" {
            if let Some(ref url) = node.url {
                id += 1;
                let created_at = node
                    .date_added
                    .as_ref()
                    .and_then(|d| chrome_time_to_datetime(d))
                    .unwrap_or_else(|| {
                        DateTime::from_timestamp(0, 0).unwrap()
                    });
                out.push(Bookmark {
                    id,
                    title: node.name.clone(),
                    url: url.clone(),
                    description: None,
                    source: "bookmark".into(),
                    content: None,
                    tags: None,
                    created_at,
                });
            }
        } else if node.node_type == "folder" {
            id = collect_children(&node.children, out, id);
        }
    }
    id
}

fn home_dir() -> Option<PathBuf> {
    std::env::var("HOME")
        .ok()
        .map(PathBuf::from)
        .or_else(|| {
            if cfg!(target_os = "windows") {
                std::env::var("USERPROFILE").ok().map(PathBuf::from)
            } else {
                None
            }
        })
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::io::Write;

    fn chrome_bookmarks_json() -> String {
        r#"{
            "checksum": "abc",
            "roots": {
                "bookmark_bar": {
                    "children": [
                        {
                            "date_added": "13249872340500000",
                            "guid": "a",
                            "id": "1",
                            "name": "Rust Lang",
                            "type": "url",
                            "url": "https://rust-lang.org"
                        },
                        {
                            "date_added": "13249872341500000",
                            "guid": "b",
                            "id": "2",
                            "name": "My Folder",
                            "type": "folder",
                            "children": [
                                {
                                    "date_added": "13249872342500000",
                                    "guid": "c",
                                    "id": "3",
                                    "name": "Nested Bookmark",
                                    "type": "url",
                                    "url": "https://example.com/nested"
                                }
                            ]
                        }
                    ],
                    "date_added": "13249872340000000",
                    "date_modified": "13249872340000000",
                    "guid": "root",
                    "id": "0",
                    "name": "Bookmarks Bar",
                    "type": "folder"
                },
                "other": {
                    "children": [
                        {
                            "date_added": "13249872343500000",
                            "guid": "d",
                            "id": "4",
                            "name": "Other Bookmark",
                            "type": "url",
                            "url": "https://other.com"
                        }
                    ],
                    "date_added": "13249872340000000",
                    "date_modified": "13249872340000000",
                    "guid": "other",
                    "id": "0",
                    "name": "Other Bookmarks",
                    "type": "folder"
                },
                "synced": {
                    "children": [],
                    "date_added": "13249872340000000",
                    "date_modified": "13249872340000000",
                    "guid": "synced",
                    "id": "0",
                    "name": "Synced Bookmarks",
                    "type": "folder"
                }
            },
            "version": 1
        }"#.to_string()
    }

    #[test]
    fn parse_chrome_bookmarks_json() {
        let root: ChromeBookmarks = serde_json::from_str(&chrome_bookmarks_json()).expect("parse");
        assert_eq!(root.roots.bookmark_bar.children.len(), 2);
        assert_eq!(root.roots.other.children.len(), 1);
        assert!(root.roots.synced.children.is_empty());
    }

    #[test]
    fn chrome_time_conversion() {
        let dt = chrome_time_to_datetime("13249872340500000").expect("convert");
        // 13249872340500000 micros since 1601-01-01 = 2020-11-15T00:05:40Z
        let expected = Utc.with_ymd_and_hms(2020, 11, 15, 0, 5, 40).unwrap();
        assert_eq!(dt, expected);
    }

    #[test]
    fn import_from_bookmarks_file() -> anyhow::Result<()> {
        let dir = tempfile::tempdir()?;
        let path = dir.path().join("Bookmarks");
        let mut f = std::fs::File::create(&path)?;
        f.write_all(chrome_bookmarks_json().as_bytes())?;

        let importer = ChromeImporter;
        let bookmarks = importer.import(dir.path())?;

        assert_eq!(bookmarks.len(), 3);
        assert_eq!(bookmarks[0].title, "Rust Lang");
        assert_eq!(bookmarks[0].url, "https://rust-lang.org");
        assert_eq!(bookmarks[1].title, "Nested Bookmark");
        assert_eq!(bookmarks[1].url, "https://example.com/nested");
        assert_eq!(bookmarks[2].title, "Other Bookmark");
        assert_eq!(bookmarks[2].url, "https://other.com");

        Ok(())
    }

    #[test]
    fn error_on_missing_bookmarks_file() {
        let dir = tempfile::tempdir().unwrap();
        let importer = ChromeImporter;
        let result = importer.import(dir.path());
        assert!(result.is_err());
    }
}