8.6 KB
raw
//! Import an existing Django status SQLite into the rust schema.
//!
//! Reads `properties_property` and `properties_check` from the Django DB
//! and writes them into the new schema, preserving Property UUIDs so
//! existing public status URLs keep working.
use anyhow::{Context, Result};
use sqlx::sqlite::SqliteConnectOptions;
use sqlx::{Connection, SqliteConnection};
use std::path::Path;
use std::path::PathBuf;
use std::str::FromStr;
pub async fn run(source: PathBuf, force: bool) -> Result<()> {
if !source.exists() {
anyhow::bail!("source DB not found: {}", source.display());
}
let data_dir = std::env::var("STATUS_DATA_DIR")
.map(PathBuf::from)
.unwrap_or_else(|_| PathBuf::from("data"));
std::fs::create_dir_all(&data_dir)?;
let dest_path = data_dir.join("db.sqlite3");
if dest_path.exists() && !force {
anyhow::bail!(
"{} already exists. Pass --force to wipe it first.",
dest_path.display()
);
}
if dest_path.exists() {
std::fs::remove_file(&dest_path).context("removing existing dest db")?;
}
let pool = crate::db::init(&data_dir).await?;
let src_url = format!("sqlite://{}?mode=ro", source.display());
let opts = SqliteConnectOptions::from_str(&src_url)?
.create_if_missing(false)
.read_only(true);
let mut src = SqliteConnection::connect_with(&opts).await.context("opening source db")?;
// ---------- properties ----------
// Django columns: id (UUID hex string), url, is_public, last_run_at,
// next_run_at, last_run_at_crawler, next_run_at_crawler, crawler_insights
// (JSON or NULL), crawl_state, crawl_started_at, last_crawl_success_at,
// last_crawl_error, last_crawl_duration_ms, last_crawl_pages_count,
// lighthouse_scores, lighthouse_details, last_lighthouse_run_at,
// last_lighthouse_success_at, last_lighthouse_error,
// last_lighthouse_duration_ms, next_lighthouse_run_at, lighthouse_state,
// lighthouse_started_at, last_alert_sent, alert_state, created_at,
// updated_at, user_id.
let rows: Vec<DjangoProperty> = sqlx::query_as::<_, DjangoProperty>(
"SELECT id, url, is_public, \
last_run_at, next_run_at, \
last_run_at_crawler, next_run_at_crawler, crawler_insights, \
crawl_state, crawl_started_at, last_crawl_success_at, last_crawl_error, \
last_crawl_duration_ms, last_crawl_pages_count, \
lighthouse_scores, lighthouse_details, last_lighthouse_run_at, \
last_lighthouse_success_at, last_lighthouse_error, last_lighthouse_duration_ms, \
next_lighthouse_run_at, lighthouse_state, lighthouse_started_at, \
last_alert_sent, alert_state, created_at, updated_at \
FROM properties_property",
)
.fetch_all(&mut src)
.await
.context("reading properties from source db")?;
let mut prop_count = 0;
for row in &rows {
let uuid = uuid::Uuid::parse_str(&row.id)
.or_else(|_| uuid::Uuid::parse_str(&format!(
"{}-{}-{}-{}-{}",
&row.id[0..8], &row.id[8..12], &row.id[12..16], &row.id[16..20], &row.id[20..]
)))
.with_context(|| format!("parsing uuid: {}", row.id))?;
let blob = uuid.as_bytes().to_vec();
sqlx::query(
r#"INSERT INTO properties (
id, url, is_public, is_protected,
last_run_at, next_run_at,
last_run_at_crawler, next_run_at_crawler, crawler_insights,
crawl_state, crawl_started_at, last_crawl_success_at, last_crawl_error,
last_crawl_duration_ms, last_crawl_pages_count,
lighthouse_scores, lighthouse_details, last_lighthouse_run_at,
last_lighthouse_success_at, last_lighthouse_error, last_lighthouse_duration_ms,
next_lighthouse_run_at, lighthouse_state, lighthouse_started_at,
alert_state, last_alert_sent, created_at, updated_at
) VALUES (?, ?, ?, 0, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"#,
)
.bind(&blob)
.bind(&row.url)
.bind(row.is_public)
.bind(parse_django_dt(&row.last_run_at))
.bind(parse_django_dt(&row.next_run_at))
.bind(parse_django_dt(&row.last_run_at_crawler))
.bind(parse_django_dt(&row.next_run_at_crawler))
.bind(&row.crawler_insights)
.bind(&row.crawl_state)
.bind(parse_django_dt(&row.crawl_started_at))
.bind(parse_django_dt(&row.last_crawl_success_at))
.bind(&row.last_crawl_error)
.bind(row.last_crawl_duration_ms)
.bind(row.last_crawl_pages_count)
.bind(&row.lighthouse_scores)
.bind(&row.lighthouse_details)
.bind(parse_django_dt(&row.last_lighthouse_run_at))
.bind(parse_django_dt(&row.last_lighthouse_success_at))
.bind(&row.last_lighthouse_error)
.bind(row.last_lighthouse_duration_ms)
.bind(parse_django_dt(&row.next_lighthouse_run_at))
.bind(&row.lighthouse_state)
.bind(parse_django_dt(&row.lighthouse_started_at))
.bind(&row.alert_state)
.bind(parse_django_dt(&row.last_alert_sent))
.bind(parse_django_dt(&row.created_at).unwrap_or(0))
.bind(parse_django_dt(&row.updated_at).unwrap_or(0))
.execute(&pool)
.await
.with_context(|| format!("inserting property {}", row.url))?;
prop_count += 1;
}
// ---------- checks ----------
let checks: Vec<DjangoCheck> = sqlx::query_as::<_, DjangoCheck>(
"SELECT property_id, status_code, response_time, headers, created_at FROM properties_check",
)
.fetch_all(&mut src)
.await
.context("reading checks from source db")?;
let mut check_count = 0;
for row in &checks {
let uuid = match uuid::Uuid::parse_str(&row.property_id) {
Ok(u) => u,
Err(_) => continue,
};
let blob = uuid.as_bytes().to_vec();
let created = parse_django_dt(&row.created_at).unwrap_or(0);
let response_ms = row.response_time;
let _ = sqlx::query(
"INSERT INTO checks (property_id, status_code, response_ms, headers, created_at) \
VALUES (?, ?, ?, ?, ?)",
)
.bind(&blob)
.bind(row.status_code)
.bind(response_ms)
.bind(&row.headers)
.bind(created)
.execute(&pool)
.await;
check_count += 1;
}
println!(
"[migrate] imported {prop_count} properties, {check_count} checks into {}",
dest_path.display()
);
let _ = src.close().await;
Ok(())
}
#[derive(sqlx::FromRow)]
struct DjangoProperty {
id: String,
url: String,
is_public: i64,
last_run_at: Option<String>,
next_run_at: Option<String>,
last_run_at_crawler: Option<String>,
next_run_at_crawler: Option<String>,
crawler_insights: Option<String>,
crawl_state: String,
crawl_started_at: Option<String>,
last_crawl_success_at: Option<String>,
last_crawl_error: Option<String>,
last_crawl_duration_ms: Option<i64>,
last_crawl_pages_count: Option<i64>,
lighthouse_scores: Option<String>,
lighthouse_details: Option<String>,
last_lighthouse_run_at: Option<String>,
last_lighthouse_success_at: Option<String>,
last_lighthouse_error: Option<String>,
last_lighthouse_duration_ms: Option<i64>,
next_lighthouse_run_at: Option<String>,
lighthouse_state: String,
lighthouse_started_at: Option<String>,
last_alert_sent: Option<String>,
alert_state: String,
created_at: Option<String>,
updated_at: Option<String>,
}
#[derive(sqlx::FromRow)]
struct DjangoCheck {
property_id: String,
status_code: i64,
// Django schema declared `INTEGER NOT NULL`; the Python ORM stored
// milliseconds as ints. (An earlier draft tried `f64` and crashed on
// strict-typed sqlx decoding.)
response_time: i64,
headers: String,
created_at: Option<String>,
}
/// Django stores datetimes as ISO8601 strings (`YYYY-MM-DD HH:MM:SS[.f]`).
/// Parse to UTC ms since epoch; return None if absent or unparseable.
fn parse_django_dt(s: &Option<String>) -> Option<i64> {
let raw = s.as_deref()?.trim();
if raw.is_empty() {
return None;
}
let normalized = raw.replace('T', " ");
let parsed = chrono::NaiveDateTime::parse_from_str(&normalized, "%Y-%m-%d %H:%M:%S%.f")
.or_else(|_| chrono::NaiveDateTime::parse_from_str(&normalized, "%Y-%m-%d %H:%M:%S"))
.ok()?;
Some(chrono::Utc.from_utc_datetime(&parsed).timestamp_millis())
}
use chrono::TimeZone;
#[allow(dead_code)]
fn _unused(_: &Path) {}