heartwood every commit a ring

Probe HTTP by phase (DNS/TCP/TLS/TTFB) over HTTP/2

3d63b731 by Isaac Bythewood · 1 day ago

modified Cargo.lock
@@ -140,28 +140,6 @@ version = "1.5.0"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"[[package]]name = "aws-lc-rs"version = "1.16.3"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "0ec6fb3fe69024a75fa7e1bfb48aa6cf59706a101658ea01bfd33b2b248a038f"dependencies = [ "aws-lc-sys", "zeroize",][[package]]name = "aws-lc-sys"version = "0.40.0"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "f50037ee5e1e41e7b8f9d161680a725bd1626cb6f8c7e901f91f942850852fe7"dependencies = [ "cc", "cmake", "dunce", "fs_extra",][[package]]name = "axum"version = "0.8.9"
@@ -384,8 +362,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "d16d90359e986641506914ba71350897565610e87ce0ad9e6f28569db3dd5c6d"dependencies = [ "find-msvc-tools", "jobserver", "libc", "shlex",]
@@ -470,15 +446,6 @@ dependencies = [ "serde",][[package]]name = "cmake"version = "0.1.58"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678"dependencies = [ "cc",][[package]]name = "cobs"version = "0.3.0"
@@ -802,12 +769,6 @@ dependencies = [ "dtoa",][[package]]name = "dunce"version = "1.0.5"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"[[package]]name = "ecow"version = "0.2.6"
@@ -1067,12 +1028,6 @@ dependencies = [ "percent-encoding",][[package]]name = "fs_extra"version = "1.3.0"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"[[package]]name = "futf"version = "0.1.5"
@@ -1260,6 +1215,25 @@ version = "0.1.2"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "f2e102e6eb644d3e0b186fc161e4460417880a0a0b87d235f2e5b8fb30f2e9e0"[[package]]name = "h2"version = "0.4.14"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "171fefbc92fe4a4de27e0698d6a5b392d6a0e333506bc49133760b3bcf948733"dependencies = [ "atomic-waker", "bytes", "fnv", "futures-core", "futures-sink", "http", "indexmap", "slab", "tokio", "tokio-util", "tracing",][[package]]name = "half"version = "2.7.1"
@@ -1583,7 +1557,7 @@ dependencies = [ "tokio", "tokio-rustls", "tower-service", "webpki-roots", "webpki-roots 1.0.7",][[package]]
@@ -1979,16 +1953,6 @@ version = "1.0.18"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"[[package]]name = "jobserver"version = "0.1.34"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"dependencies = [ "getrandom 0.3.4", "libc",][[package]]name = "js-sys"version = "0.3.98"
@@ -2118,7 +2082,7 @@ dependencies = [ "tokio", "tokio-rustls", "url", "webpki-roots", "webpki-roots 1.0.7",][[package]]
@@ -3143,7 +3107,7 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", "webpki-roots", "webpki-roots 1.0.7",][[package]]
@@ -3265,7 +3229,6 @@ version = "0.23.40"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b"dependencies = [ "aws-lc-rs", "log", "once_cell", "ring",
@@ -3291,7 +3254,6 @@ version = "0.103.13"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e"dependencies = [ "aws-lc-rs", "ring", "rustls-pki-types", "untrusted",
@@ -3852,9 +3814,11 @@ dependencies = [ "chrono", "dotenvy", "futures-util", "h2", "hickory-resolver", "hmac", "html5ever", "http", "lettre", "mime_guess", "minijinja",
@@ -3872,6 +3836,7 @@ dependencies = [ "tempfile", "thiserror 2.0.18", "tokio", "tokio-rustls", "tower", "tower-cookies", "tower-http",
@@ -3883,6 +3848,7 @@ dependencies = [ "url", "urlencoding", "uuid", "webpki-roots 0.26.11",][[package]]
@@ -5240,6 +5206,15 @@ dependencies = [ "wasm-bindgen",][[package]]name = "webpki-roots"version = "0.26.11"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9"dependencies = [ "webpki-roots 1.0.7",][[package]]name = "webpki-roots"version = "1.0.7"
modified Cargo.toml
@@ -26,7 +26,14 @@ thiserror = "2"tracing = "0.1"tracing-subscriber = { version = "0.3", features = ["env-filter"] }reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "stream", "gzip", "brotli"] }rustls = "0.23"# Pin to ring (matching tokio-rustls below) so we don't drag aws-lc-sys's# C build into the deps tree just for a feature flag default. The phased# prober installs ring as the process-wide CryptoProvider in main.rs.rustls = { version = "0.23", default-features = false, features = ["ring", "logging", "std", "tls12"] }tokio-rustls = { version = "0.26", default-features = false, features = ["ring"] }webpki-roots = "0.26"h2 = "0.4"http = "1"tempfile = "3"mime_guess = "2"urlencoding = "2"
modified frontend/static_src/properties/scripts/property_graphs.js
@@ -57,9 +57,18 @@ document.addEventListener("DOMContentLoaded", function () {    document.getElementById("chart-status-response-times-data").innerHTML  );  const ctx = canvas.getContext("2d");  const gradient = ctx.createLinearGradient(0, 0, 0, 300);  gradient.addColorStop(0, "rgba(107, 158, 120, 0.35)");  gradient.addColorStop(1, "rgba(107, 158, 120, 0)");  // Total stays the loud green line so old-eye muscle memory still works.  // The four phase lines are thinner and more muted so they read as  // breakdown, not as five equal-weight series.  const series = [    { key: "total", label: "Total",   color: accent.green,      width: 2,   tension: 0.25 },    { key: "dns",   label: "DNS",     color: accent.terracotta, width: 1.5, tension: 0.2  },    { key: "tcp",   label: "TCP",     color: accent.amber,      width: 1.5, tension: 0.2  },    { key: "tls",   label: "TLS",     color: accent.slate,      width: 1.5, tension: 0.2  },    { key: "ttfb",  label: "TTFB",    color: "#a09890",         width: 1.5, tension: 0.2  },  ];  const chart = new Chart(ctx, {    type: "line",    data: {
@@ -67,25 +76,27 @@ document.addEventListener("DOMContentLoaded", function () {        const date = new Date(d.label);        return `${date.getHours() % 12 || 12}:${date.getMinutes() < 10 ? "0" : ""}${date.getMinutes()} ${date.getHours() >= 12 ? "PM" : "AM"}`;      }),      datasets: [        {          label: "Response time (ms)",          data: data.map((d) => d.count),          backgroundColor: gradient,          borderColor: accent.green,          borderWidth: 2,          pointRadius: 0,          pointHoverRadius: 4,          pointHoverBackgroundColor: accent.greenBright,          tension: 0.25,          fill: true,        },      ],      datasets: series.map((s) => ({        label: s.label,        // Older rows (pre-migration-0002) have null phase timings — chart.js        // treats null as a gap in the line, which is exactly what we want.        data: data.map((d) => (d[s.key] == null ? null : d[s.key])),        borderColor: s.color,        backgroundColor: s.color,        borderWidth: s.width,        pointRadius: 0,        pointHoverRadius: 4,        pointHoverBackgroundColor: s.color,        tension: s.tension,        fill: false,        spanGaps: false,      })),    },    options: {      responsive: true,      maintainAspectRatio: false,      animation: { duration: 0 },      interaction: { mode: "index", intersect: false },      plugins: {        tooltip: {          mode: "index",
@@ -98,6 +109,10 @@ document.addEventListener("DOMContentLoaded", function () {          padding: 10,          titleFont: tickFont,          bodyFont: tickFont,          callbacks: {            label: (item) =>              ` ${item.dataset.label}: ${item.parsed.y == null ? "–" : item.parsed.y + " ms"}`,          },        },        legend: { position: "top", labels: legendLabel },      },
added migrations/0002_phase_timings.sql
@@ -0,0 +1,7 @@-- Per-phase timing breakdown for HTTP checks. Pre-existing rows leave-- these NULL; the dashboard chart skips nulls. response_ms remains the-- canonical total (alert email avg still reads it).ALTER TABLE checks ADD COLUMN dns_ms  INTEGER;ALTER TABLE checks ADD COLUMN tcp_ms  INTEGER;ALTER TABLE checks ADD COLUMN tls_ms  INTEGER;ALTER TABLE checks ADD COLUMN ttfb_ms INTEGER;
modified src/checker.rs
@@ -1,77 +1,285 @@use crate::alerts;use crate::db::now_ms;use crate::models::PropertyRow;use anyhow::{anyhow, Context};use chrono::Timelike;use hickory_resolver::TokioAsyncResolver;use rustls::pki_types::ServerName;use serde_json::json;use sqlx::SqlitePool;use std::collections::BTreeMap;use std::net::SocketAddr;use std::sync::Arc;use std::time::{Duration, Instant};use tokio::net::TcpStream;use tokio_rustls::TlsConnector;use url::Url;use uuid::Uuid;const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 \                         (KHTML, like Gecko) Chrome/102.0.5005.115 Safari/537.36 Status/2.0.0";const HTTP_TIMEOUT_SECS: u64 = 10;const MAX_REDIRECTS: usize = 5;/// Phase-by-phase timings for a single HTTP probe. `None` means the phase/// didn't run (the probe errored before reaching it). `total_ms` is/// wall-clock first-hop end-to-end and is also written to/// `checks.response_ms` for backward compat with the alert email avg./// Fields are `Option<i64>` so that pre-rewrite rows (migration 0002 left/// them NULL) keep deserializing.#[derive(Debug, Default, Clone, Copy)]pub struct PhaseTimings {    pub dns_ms: Option<i64>,    pub tcp_ms: Option<i64>,    pub tls_ms: Option<i64>,    pub ttfb_ms: Option<i64>,    pub total_ms: i64,}struct ProbeOutcome {    status_code: i64,    headers_json: String,    timings: PhaseTimings,}/// One-hop result with everything we need to decide whether to follow a/// redirect.struct HopResult {    status_code: i64,    headers: BTreeMap<String, String>,    raw_headers_json: String,    timings: PhaseTimings,}/// Build a fresh rustls config per probe. Keeps the "fresh client per/// probe = real handshake cost" invariant. ALPN-pinned to `h2` only:/// servers that don't speak HTTP/2 will fail the handshake (mapped to/// 526), which matches the project's "no HTTP/1.1" stance.fn tls_config_h2() -> Arc<rustls::ClientConfig> {    let mut roots = rustls::RootCertStore::empty();    roots.extend(webpki_roots::TLS_SERVER_ROOTS.iter().cloned());    let mut cfg = rustls::ClientConfig::builder()        .with_root_certificates(roots)        .with_no_client_auth();    cfg.alpn_protocols = vec![b"h2".to_vec()];    Arc::new(cfg)}fn looks_like_ssl_error(e: &anyhow::Error) -> bool {    let s = format!("{e:?}").to_lowercase();    s.contains("certificate")        || s.contains("invalidcertificate")        || s.contains("tls")        || s.contains("handshake")}/// Run a single HTTP check and persist the result. Maps SSL errors to 526/// (Cloudflare convention) and timeouts to 408 so the dashboard can show/// failure reasons without piping arbitrary error messages.pub async fn run_check(pool: &SqlitePool, prop: &PropertyRow) -> sqlx::Result<i64> {    let client = match reqwest::Client::builder()        .user_agent(USER_AGENT)        .timeout(Duration::from_secs(HTTP_TIMEOUT_SECS))        .redirect(reqwest::redirect::Policy::limited(5))        .build()    {        Ok(c) => c,    let outcome = match probe_with_redirects(&prop.url).await {        Ok(o) => o,        Err(e) => {            tracing::error!("reqwest client build: {e}");            return Ok(0);        }    };    let started = Instant::now();    let result = client.get(&prop.url).send().await;    let elapsed_ms = started.elapsed().as_millis().min(i64::MAX as u128) as i64;    let (status_code, response_ms, headers_json) = match result {        Ok(resp) => {            let status = resp.status().as_u16() as i64;            let mut hdrs = serde_json::Map::new();            for (k, v) in resp.headers().iter() {                if let Ok(s) = v.to_str() {                    hdrs.insert(k.as_str().to_string(), json!(s));                }            let code = if looks_like_ssl_error(&e) { 526 } else { 408 };            ProbeOutcome {                status_code: code,                headers_json: "{}".to_string(),                timings: PhaseTimings {                    total_ms: (HTTP_TIMEOUT_SECS as i64) * 1000,                    ..PhaseTimings::default()                },            }            (status, elapsed_ms, serde_json::Value::Object(hdrs).to_string())        }        Err(err) if err.is_timeout() => (408, 10_000, "{}".to_string()),        Err(err) => {            // reqwest with rustls reports cert errors via the underlying            // error chain; fall back to substring match for portability.            let chain = format!("{err:?}");            let lower = chain.to_lowercase();            let is_ssl = lower.contains("certificate")                || lower.contains("invalidcertificate")                || lower.contains("tls")                || lower.contains("handshake");            let code = if is_ssl { 526 } else { 408 };            (code, 10_000, "{}".to_string())        }    };    let id = prop.id.clone();    sqlx::query(        "INSERT INTO checks (property_id, status_code, response_ms, headers, created_at) \         VALUES (?, ?, ?, ?, ?)",        "INSERT INTO checks (property_id, status_code, response_ms, headers, dns_ms, tcp_ms, tls_ms, ttfb_ms, created_at) \         VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",    )    .bind(&id)    .bind(status_code)    .bind(response_ms)    .bind(&headers_json)    .bind(outcome.status_code)    .bind(outcome.timings.total_ms)    .bind(&outcome.headers_json)    .bind(outcome.timings.dns_ms)    .bind(outcome.timings.tcp_ms)    .bind(outcome.timings.tls_ms)    .bind(outcome.timings.ttfb_ms)    .bind(now_ms())    .execute(pool)    .await?;    Ok(status_code)    Ok(outcome.status_code)}/// First-hop with full phase timings, then follow up to MAX_REDIRECTS/// 3xx hops to discover the final status code (so the alert state machine/// keeps working when a property uses an http→https or apex→www/// redirect). Phase timings always reflect the first hop only — that's/// the latency a fresh visitor pays before being redirected, and it's the/// only number that's meaningful when later hops live on different/// servers/domains.async fn probe_with_redirects(url_str: &str) -> anyhow::Result<ProbeOutcome> {    let url = Url::parse(url_str).context("invalid URL")?;    let outer = tokio::time::timeout(        Duration::from_secs(HTTP_TIMEOUT_SECS),        async {            let mut current = url.clone();            let first = phased_hop(&current).await?;            let first_timings = first.timings;            let mut status = first.status_code;            let mut headers_json = first.raw_headers_json;            let mut headers = first.headers;            let mut hops = 0usize;            while is_redirect(status) && hops < MAX_REDIRECTS {                let Some(loc) = headers.get("location").cloned() else { break };                let Ok(next) = current.join(&loc) else { break };                current = next;                let hop = match phased_hop(&current).await {                    Ok(h) => h,                    Err(_) => break,                };                status = hop.status_code;                headers_json = hop.raw_headers_json;                headers = hop.headers;                hops += 1;            }            Ok::<_, anyhow::Error>(ProbeOutcome {                status_code: status,                headers_json,                timings: first_timings,            })        },    )    .await;    match outer {        Ok(Ok(o)) => Ok(o),        Ok(Err(e)) => Err(e),        Err(_) => Err(anyhow!("timeout after {HTTP_TIMEOUT_SECS}s")),    }}fn is_redirect(code: i64) -> bool {    matches!(code, 301 | 302 | 303 | 307 | 308)}async fn phased_hop(url: &Url) -> anyhow::Result<HopResult> {    let host = url.host_str().context("URL missing host")?.to_string();    let port = url.port_or_known_default().context("URL missing port")?;    let path_q = match url.query() {        Some(q) if !q.is_empty() => format!("{}?{}", url.path(), q),        _ => url.path().to_string(),    };    let path_q = if path_q.is_empty() { "/".to_string() } else { path_q };    let is_https = url.scheme() == "https";    let total_start = Instant::now();    let dns_start = Instant::now();    let resolver =        TokioAsyncResolver::tokio_from_system_conf().context("creating dns resolver")?;    let lookup = resolver.lookup_ip(host.as_str()).await.context("dns lookup")?;    let ip = lookup        .iter()        .next()        .ok_or_else(|| anyhow!("no addresses for {host}"))?;    let dns_ms = dns_start.elapsed().as_millis() as i64;    let addr = SocketAddr::new(ip, port);    let tcp_start = Instant::now();    let tcp = TcpStream::connect(addr).await.context("tcp connect")?;    tcp.set_nodelay(true).ok();    let tcp_ms = tcp_start.elapsed().as_millis() as i64;    if !is_https {        // h2 over plain TCP (h2c with prior knowledge) is rare in the        // wild, and the project is HTTP/2-only, so reject http:// URLs        // explicitly rather than silently downgrading.        return Err(anyhow!("plain HTTP not supported; use https:// (HTTP/2 only)"));    }    let tls_start = Instant::now();    let server_name =        ServerName::try_from(host.clone()).context("invalid TLS server name")?;    let connector = TlsConnector::from(tls_config_h2());    let tls_stream = connector        .connect(server_name, tcp)        .await        .context("tls handshake")?;    let tls_ms = tls_start.elapsed().as_millis() as i64;    let (status, headers, raw_headers_json, ttfb_ms) =        h2_request(tls_stream, &host, &path_q).await?;    let total_ms = total_start.elapsed().as_millis() as i64;    Ok(HopResult {        status_code: status,        headers,        raw_headers_json,        timings: PhaseTimings {            dns_ms: Some(dns_ms),            tcp_ms: Some(tcp_ms),            tls_ms: Some(tls_ms),            ttfb_ms: Some(ttfb_ms),            total_ms,        },    })}/// Run an HTTP/2 GET over an established TLS stream and return/// (status_code, headers, headers_json, ttfb_ms). TTFB is measured from/// the start of the h2 client handshake (SETTINGS exchange) to the/// arrival of the response HEADERS frame, so it includes h2 protocol/// setup; the user-facing chart treats it as "everything between secure/// connection ready and first server byte", which matches curl's/// `time_starttransfer` minus `time_appconnect`.async fn h2_request(    tls_stream: tokio_rustls::client::TlsStream<TcpStream>,    host: &str,    path: &str,) -> anyhow::Result<(i64, BTreeMap<String, String>, String, i64)> {    let ttfb_start = Instant::now();    let (sr, connection) = h2::client::handshake(tls_stream)        .await        .context("h2 handshake")?;    // h2 needs someone to drive the connection's I/O loop. Spawn a task    // that lives just as long as this probe; we abort it on the way out.    let conn_task = tokio::spawn(async move {        let _ = connection.await;    });    let mut sr = sr.ready().await.context("h2 send-request ready")?;    let req = http::Request::builder()        .method("GET")        .uri(format!("https://{host}{path}"))        .header("user-agent", USER_AGENT)        .header("accept", "*/*")        .body(())        .context("h2 request build")?;    let (rsp_fut, _send_stream) = sr.send_request(req, true).context("h2 send_request")?;    let rsp = rsp_fut.await.context("h2 response")?;    let ttfb_ms = ttfb_start.elapsed().as_millis() as i64;    let status = rsp.status().as_u16() as i64;    let mut headers = BTreeMap::new();    for (k, v) in rsp.headers().iter() {        if let Ok(s) = v.to_str() {            headers.insert(k.as_str().to_lowercase(), s.to_string());        }    }    let raw_headers_json = serde_json::Value::Object(        headers.iter().map(|(k, v)| (k.clone(), json!(v))).collect(),    )    .to_string();    drop(sr);    conn_task.abort();    Ok((status, headers, raw_headers_json, ttfb_ms))}/// Run a check, persist it, then advance the alert state machine and fire
@@ -100,12 +308,11 @@ async fn advance_alert_state(    let is_up = status_code == 200;    let mut tx = pool.begin().await?;    let row: Option<(String,)> = sqlx::query_as(        "SELECT alert_state FROM properties WHERE id = ?",    )    .bind(prop.id.clone())    .fetch_optional(&mut *tx)    .await?;    let row: Option<(String,)> =        sqlx::query_as("SELECT alert_state FROM properties WHERE id = ?")            .bind(prop.id.clone())            .fetch_optional(&mut *tx)            .await?;    let Some((current_state,)) = row else {        return Ok(());    };
modified src/main.rs
@@ -28,6 +28,14 @@ async fn main() -> anyhow::Result<()> {        )        .init();    // rustls 0.23 doesn't pick a CryptoProvider on its own when neither    // the `ring` nor the `aws-lc-rs` feature is enabled at the rustls    // crate level. The phased prober (src/checker.rs) builds rustls    // ClientConfigs directly, which would panic on first probe without    // this. Install once at startup; reqwest/lettre's TLS paths are    // unaffected because they ship their own provider via hyper-rustls.    let _ = rustls::crypto::ring::default_provider().install_default();    // Subcommand dispatch. Anything besides a known subcommand falls through to the server.    let mut argv = std::env::args().skip(1);    if let Some(first) = argv.next() {
modified src/models.rs
@@ -201,6 +201,17 @@ pub struct CheckRow {    pub response_ms: i64,    pub headers: String,    pub created_at: i64,    // Phase-by-phase timings (added in migration 0002). NULL for rows    // written before the rewrite to a phased prober; new rows always have    // dns_ms/tcp_ms/ttfb_ms set, and tls_ms set for HTTPS targets only.    #[serde(default)]    pub dns_ms: Option<i64>,    #[serde(default)]    pub tcp_ms: Option<i64>,    #[serde(default)]    pub tls_ms: Option<i64>,    #[serde(default)]    pub ttfb_ms: Option<i64>,}pub async fn recent_checks(
modified src/routes/dashboard.rs
@@ -80,7 +80,11 @@ pub async fn property(                "label": chrono::DateTime::<chrono::Utc>::from_timestamp_millis(c.created_at)                    .map(|d| d.to_rfc3339())                    .unwrap_or_default(),                "count": c.response_ms,                "total": c.response_ms,                "dns":   c.dns_ms,                "tcp":   c.tcp_ms,                "tls":   c.tls_ms,                "ttfb":  c.ttfb_ms,            })        })        .collect();