heartwood every commit a ring
8.6 KB raw
//! Import an existing Django status SQLite into the rust schema.
//!
//! Reads `properties_property` and `properties_check` from the Django DB
//! and writes them into the new schema, preserving Property UUIDs so
//! existing public status URLs keep working.
use anyhow::{Context, Result};
use sqlx::sqlite::SqliteConnectOptions;
use sqlx::{Connection, SqliteConnection};
use std::path::Path;
use std::path::PathBuf;
use std::str::FromStr;

pub async fn run(source: PathBuf, force: bool) -> Result<()> {
    if !source.exists() {
        anyhow::bail!("source DB not found: {}", source.display());
    }

    let data_dir = std::env::var("STATUS_DATA_DIR")
        .map(PathBuf::from)
        .unwrap_or_else(|_| PathBuf::from("data"));
    std::fs::create_dir_all(&data_dir)?;
    let dest_path = data_dir.join("db.sqlite3");

    if dest_path.exists() && !force {
        anyhow::bail!(
            "{} already exists. Pass --force to wipe it first.",
            dest_path.display()
        );
    }
    if dest_path.exists() {
        std::fs::remove_file(&dest_path).context("removing existing dest db")?;
    }

    let pool = crate::db::init(&data_dir).await?;

    let src_url = format!("sqlite://{}?mode=ro", source.display());
    let opts = SqliteConnectOptions::from_str(&src_url)?
        .create_if_missing(false)
        .read_only(true);
    let mut src = SqliteConnection::connect_with(&opts).await.context("opening source db")?;

    // ---------- properties ----------
    // Django columns: id (UUID hex string), url, is_public, last_run_at,
    // next_run_at, last_run_at_crawler, next_run_at_crawler, crawler_insights
    // (JSON or NULL), crawl_state, crawl_started_at, last_crawl_success_at,
    // last_crawl_error, last_crawl_duration_ms, last_crawl_pages_count,
    // lighthouse_scores, lighthouse_details, last_lighthouse_run_at,
    // last_lighthouse_success_at, last_lighthouse_error,
    // last_lighthouse_duration_ms, next_lighthouse_run_at, lighthouse_state,
    // lighthouse_started_at, last_alert_sent, alert_state, created_at,
    // updated_at, user_id.
    let rows: Vec<DjangoProperty> = sqlx::query_as::<_, DjangoProperty>(
        "SELECT id, url, is_public, \
                last_run_at, next_run_at, \
                last_run_at_crawler, next_run_at_crawler, crawler_insights, \
                crawl_state, crawl_started_at, last_crawl_success_at, last_crawl_error, \
                last_crawl_duration_ms, last_crawl_pages_count, \
                lighthouse_scores, lighthouse_details, last_lighthouse_run_at, \
                last_lighthouse_success_at, last_lighthouse_error, last_lighthouse_duration_ms, \
                next_lighthouse_run_at, lighthouse_state, lighthouse_started_at, \
                last_alert_sent, alert_state, created_at, updated_at \
         FROM properties_property",
    )
    .fetch_all(&mut src)
    .await
    .context("reading properties from source db")?;

    let mut prop_count = 0;
    for row in &rows {
        let uuid = uuid::Uuid::parse_str(&row.id)
            .or_else(|_| uuid::Uuid::parse_str(&format!(
                "{}-{}-{}-{}-{}",
                &row.id[0..8], &row.id[8..12], &row.id[12..16], &row.id[16..20], &row.id[20..]
            )))
            .with_context(|| format!("parsing uuid: {}", row.id))?;
        let blob = uuid.as_bytes().to_vec();
        sqlx::query(
            r#"INSERT INTO properties (
                id, url, is_public, is_protected,
                last_run_at, next_run_at,
                last_run_at_crawler, next_run_at_crawler, crawler_insights,
                crawl_state, crawl_started_at, last_crawl_success_at, last_crawl_error,
                last_crawl_duration_ms, last_crawl_pages_count,
                lighthouse_scores, lighthouse_details, last_lighthouse_run_at,
                last_lighthouse_success_at, last_lighthouse_error, last_lighthouse_duration_ms,
                next_lighthouse_run_at, lighthouse_state, lighthouse_started_at,
                alert_state, last_alert_sent, created_at, updated_at
            ) VALUES (?, ?, ?, 0, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"#,
        )
        .bind(&blob)
        .bind(&row.url)
        .bind(row.is_public)
        .bind(parse_django_dt(&row.last_run_at))
        .bind(parse_django_dt(&row.next_run_at))
        .bind(parse_django_dt(&row.last_run_at_crawler))
        .bind(parse_django_dt(&row.next_run_at_crawler))
        .bind(&row.crawler_insights)
        .bind(&row.crawl_state)
        .bind(parse_django_dt(&row.crawl_started_at))
        .bind(parse_django_dt(&row.last_crawl_success_at))
        .bind(&row.last_crawl_error)
        .bind(row.last_crawl_duration_ms)
        .bind(row.last_crawl_pages_count)
        .bind(&row.lighthouse_scores)
        .bind(&row.lighthouse_details)
        .bind(parse_django_dt(&row.last_lighthouse_run_at))
        .bind(parse_django_dt(&row.last_lighthouse_success_at))
        .bind(&row.last_lighthouse_error)
        .bind(row.last_lighthouse_duration_ms)
        .bind(parse_django_dt(&row.next_lighthouse_run_at))
        .bind(&row.lighthouse_state)
        .bind(parse_django_dt(&row.lighthouse_started_at))
        .bind(&row.alert_state)
        .bind(parse_django_dt(&row.last_alert_sent))
        .bind(parse_django_dt(&row.created_at).unwrap_or(0))
        .bind(parse_django_dt(&row.updated_at).unwrap_or(0))
        .execute(&pool)
        .await
        .with_context(|| format!("inserting property {}", row.url))?;
        prop_count += 1;
    }

    // ---------- checks ----------
    let checks: Vec<DjangoCheck> = sqlx::query_as::<_, DjangoCheck>(
        "SELECT property_id, status_code, response_time, headers, created_at FROM properties_check",
    )
    .fetch_all(&mut src)
    .await
    .context("reading checks from source db")?;

    let mut check_count = 0;
    for row in &checks {
        let uuid = match uuid::Uuid::parse_str(&row.property_id) {
            Ok(u) => u,
            Err(_) => continue,
        };
        let blob = uuid.as_bytes().to_vec();
        let created = parse_django_dt(&row.created_at).unwrap_or(0);
        let response_ms = row.response_time;
        let _ = sqlx::query(
            "INSERT INTO checks (property_id, status_code, response_ms, headers, created_at) \
             VALUES (?, ?, ?, ?, ?)",
        )
        .bind(&blob)
        .bind(row.status_code)
        .bind(response_ms)
        .bind(&row.headers)
        .bind(created)
        .execute(&pool)
        .await;
        check_count += 1;
    }

    println!(
        "[migrate] imported {prop_count} properties, {check_count} checks into {}",
        dest_path.display()
    );
    let _ = src.close().await;
    Ok(())
}

#[derive(sqlx::FromRow)]
struct DjangoProperty {
    id: String,
    url: String,
    is_public: i64,
    last_run_at: Option<String>,
    next_run_at: Option<String>,
    last_run_at_crawler: Option<String>,
    next_run_at_crawler: Option<String>,
    crawler_insights: Option<String>,
    crawl_state: String,
    crawl_started_at: Option<String>,
    last_crawl_success_at: Option<String>,
    last_crawl_error: Option<String>,
    last_crawl_duration_ms: Option<i64>,
    last_crawl_pages_count: Option<i64>,
    lighthouse_scores: Option<String>,
    lighthouse_details: Option<String>,
    last_lighthouse_run_at: Option<String>,
    last_lighthouse_success_at: Option<String>,
    last_lighthouse_error: Option<String>,
    last_lighthouse_duration_ms: Option<i64>,
    next_lighthouse_run_at: Option<String>,
    lighthouse_state: String,
    lighthouse_started_at: Option<String>,
    last_alert_sent: Option<String>,
    alert_state: String,
    created_at: Option<String>,
    updated_at: Option<String>,
}

#[derive(sqlx::FromRow)]
struct DjangoCheck {
    property_id: String,
    status_code: i64,
    // Django schema declared `INTEGER NOT NULL`; the Python ORM stored
    // milliseconds as ints. (An earlier draft tried `f64` and crashed on
    // strict-typed sqlx decoding.)
    response_time: i64,
    headers: String,
    created_at: Option<String>,
}

/// Django stores datetimes as ISO8601 strings (`YYYY-MM-DD HH:MM:SS[.f]`).
/// Parse to UTC ms since epoch; return None if absent or unparseable.
fn parse_django_dt(s: &Option<String>) -> Option<i64> {
    let raw = s.as_deref()?.trim();
    if raw.is_empty() {
        return None;
    }
    let normalized = raw.replace('T', " ");
    let parsed = chrono::NaiveDateTime::parse_from_str(&normalized, "%Y-%m-%d %H:%M:%S%.f")
        .or_else(|_| chrono::NaiveDateTime::parse_from_str(&normalized, "%Y-%m-%d %H:%M:%S"))
        .ok()?;
    Some(chrono::Utc.from_utc_datetime(&parsed).timestamp_millis())
}

use chrono::TimeZone;

#[allow(dead_code)]
fn _unused(_: &Path) {}