status · src/lighthouse.rs

9.7 KB raw
use serde_json::Value;
use std::path::PathBuf;
use std::process::Stdio;
use std::time::Duration;
use thiserror::Error;
use tokio::process::Command;

/// Locate a chromium binary for lighthouse to drive. Tries `CHROMIUM_BIN`,
/// then a PATH search for the common binary names, then a glob over
/// `/opt/playwright-browsers/` so the webdev container Just Works without
/// per-shell env vars. Lighthouse passes the resulting path via the
/// `CHROME_PATH` env var (the npm CLI looks for that).
fn find_chromium() -> Option<String> {
    if let Ok(p) = std::env::var("CHROMIUM_BIN") {
        let path = PathBuf::from(&p);
        if path.is_file() {
            return Some(p);
        }
    }
    let names = [
        "chromium",
        "chromium-browser",
        "google-chrome",
        "chrome",
        "chrome-headless-shell",
    ];
    if let Some(path_var) = std::env::var_os("PATH") {
        for dir in std::env::split_paths(&path_var) {
            for name in &names {
                let candidate = dir.join(name);
                if candidate.is_file() {
                    return Some(candidate.to_string_lossy().into_owned());
                }
            }
        }
    }
    if let Ok(entries) = std::fs::read_dir("/opt/playwright-browsers") {
        for entry in entries.flatten() {
            let base = entry.path();
            // Lighthouse needs a full chrome (it drives DevTools), not the
            // headless-shell. Prefer chrome-linux64/chrome, fall back to the
            // chromium build that ships under chromium-*/chrome-linux/chrome.
            for rel in [
                "chrome-linux64/chrome",
                "chrome-linux/chrome",
                "chrome-headless-shell-linux64/chrome-headless-shell",
            ] {
                let candidate = base.join(rel);
                if candidate.is_file() {
                    return Some(candidate.to_string_lossy().into_owned());
                }
            }
        }
    }
    None
}

const SUBPROCESS_TIMEOUT_SECS: u64 = 180;
const CHROME_FLAGS: &str = "--headless --no-sandbox --disable-dev-shm-usage --disable-gpu";

#[derive(Debug, Error)]
pub enum LighthouseError {
    #[error("lighthouse binary missing at {0:?}")]
    BinaryMissing(PathBuf),
    #[error("lighthouse timed out after {0}s")]
    Timeout(u64),
    #[error("lighthouse exited {code}: {stderr}")]
    ExitNonZero { code: i32, stderr: String },
    #[error("could not parse lighthouse output: {0}")]
    Parse(#[from] serde_json::Error),
    #[error("missing category in lighthouse output: {0}")]
    MissingCategory(&'static str),
    #[error("null score(s) returned by lighthouse: {0:?}")]
    NullScores(Vec<&'static str>),
    #[error("subprocess io: {0}")]
    Io(#[from] std::io::Error),
}

/// Run the lighthouse npm CLI and return the parsed JSON report.
pub async fn fetch(root: &std::path::Path, url: &str) -> Result<Value, LighthouseError> {
    let bin = root.join("node_modules/.bin/lighthouse");
    if !bin.exists() {
        return Err(LighthouseError::BinaryMissing(bin));
    }

    let chromium = find_chromium();

    // `bun run --bun` symlinks `node` → bun, so the lighthouse shim's
    // `#!/usr/bin/env node` shebang resolves to bun's runtime. Lets us drop
    // nodejs/npm from the image entirely.
    let mut cmd = Command::new("bun");
    cmd.arg("run")
        .arg("--bun")
        .arg(&bin)
        .arg(url)
        .arg(format!("--chrome-flags={CHROME_FLAGS}"))
        .arg("--output=json")
        .arg("--output-path=stdout")
        .arg("--quiet")
        .env_clear()
        .env("PATH", "/usr/bin:/bin:/usr/local/bin")
        .stdout(Stdio::piped())
        .stderr(Stdio::piped());
    if let Some(c) = chromium {
        cmd.env("CHROME_PATH", &c);
    }

    let child = cmd.spawn()?;
    let output = match tokio::time::timeout(
        Duration::from_secs(SUBPROCESS_TIMEOUT_SECS),
        child.wait_with_output(),
    )
    .await
    {
        Ok(r) => r?,
        Err(_) => return Err(LighthouseError::Timeout(SUBPROCESS_TIMEOUT_SECS)),
    };

    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
        let truncated = stderr
            .chars()
            .rev()
            .take(500)
            .collect::<String>()
            .chars()
            .rev()
            .collect::<String>();
        return Err(LighthouseError::ExitNonZero {
            code: output.status.code().unwrap_or(-1),
            stderr: truncated,
        });
    }

    Ok(serde_json::from_slice(&output.stdout)?)
}

#[derive(Debug, serde::Serialize)]
pub struct Scores {
    #[serde(rename = "Performance")]
    pub performance: i64,
    #[serde(rename = "Accessibility")]
    pub accessibility: i64,
    #[serde(rename = "Best practices")]
    pub best_practices: i64,
    #[serde(rename = "SEO")]
    pub seo: i64,
}

pub fn parse_scores(results: &Value) -> Result<Scores, LighthouseError> {
    let cats = results
        .get("categories")
        .ok_or(LighthouseError::MissingCategory("categories"))?;
    let pull = |k: &'static str| -> Result<Option<f64>, LighthouseError> {
        let cat = cats.get(k).ok_or(LighthouseError::MissingCategory(k))?;
        Ok(cat.get("score").and_then(|v| v.as_f64()))
    };
    let p = pull("performance")?;
    let a = pull("accessibility")?;
    let b = pull("best-practices")?;
    let s = pull("seo")?;
    let mut nulls = Vec::new();
    if p.is_none() { nulls.push("Performance"); }
    if a.is_none() { nulls.push("Accessibility"); }
    if b.is_none() { nulls.push("Best practices"); }
    if s.is_none() { nulls.push("SEO"); }
    if !nulls.is_empty() {
        return Err(LighthouseError::NullScores(nulls));
    }
    let to_pct = |v: f64| (v * 100.0).round() as i64;
    Ok(Scores {
        performance: to_pct(p.unwrap()),
        accessibility: to_pct(a.unwrap()),
        best_practices: to_pct(b.unwrap()),
        seo: to_pct(s.unwrap()),
    })
}

#[derive(Debug, serde::Serialize)]
pub struct Details {
    pub metrics: Vec<Value>,
    pub opportunities: Vec<Value>,
}

pub fn parse_details(results: &Value) -> Option<Details> {
    let category = results.get("categories")?.get("performance")?;
    let audits = results.get("audits")?.as_object()?;

    let mut metrics: Vec<Value> = Vec::new();
    let mut opportunities: Vec<Value> = Vec::new();

    if let Some(refs) = category.get("auditRefs").and_then(|v| v.as_array()) {
        for r in refs {
            let id = r.get("id").and_then(|v| v.as_str()).unwrap_or_default();
            let Some(audit) = audits.get(id) else { continue };
            let group = r.get("group").and_then(|v| v.as_str()).unwrap_or("");
            let weight = r.get("weight").and_then(|v| v.as_f64()).unwrap_or(0.0);
            let score = audit.get("score").and_then(|v| v.as_f64());

            if group == "metrics" && weight > 0.0 {
                metrics.push(serde_json::json!({
                    "id": id,
                    "acronym": r.get("acronym").and_then(|v| v.as_str()).unwrap_or(id),
                    "title": audit.get("title"),
                    "display_value": audit.get("displayValue"),
                    "score": score,
                    "weight": weight,
                }));
                continue;
            }

            // Opportunities/diagnostics: skip passing/manual/not-applicable.
            // `group: "hidden"` covers TTI and other audits Lighthouse keeps
            // around but no longer scores; they shouldn't masquerade as wins.
            if group == "hidden" {
                continue;
            }
            let mode = audit
                .get("scoreDisplayMode")
                .and_then(|v| v.as_str())
                .unwrap_or("");
            if matches!(mode, "manual" | "notApplicable" | "informative") {
                continue;
            }
            let Some(s) = score else { continue };
            if s >= 0.9 {
                continue;
            }
            let savings_ms = audit
                .get("details")
                .and_then(|d| d.get("overallSavingsMs"))
                .and_then(|v| v.as_f64())
                .unwrap_or(0.0);
            let savings_bytes = audit
                .get("details")
                .and_then(|d| d.get("overallSavingsBytes"))
                .and_then(|v| v.as_f64())
                .unwrap_or(0.0);
            let has_metric_savings = audit
                .get("metricSavings")
                .and_then(|v| v.as_object())
                .map(|m| {
                    m.values()
                        .any(|v| v.as_f64().map(|n| n > 0.0).unwrap_or(false))
                })
                .unwrap_or(false);
            // Require at least one actionable signal; pure diagnostics
            // (forced-reflow, network-dependency-tree, etc.) carry none and
            // would otherwise show up with a meaningless 0 score.
            if savings_ms == 0.0 && savings_bytes == 0.0 && !has_metric_savings {
                continue;
            }
            opportunities.push(serde_json::json!({
                "id": id,
                "title": audit.get("title"),
                "display_value": audit.get("displayValue"),
                "savings_ms": savings_ms,
            }));
        }
    }

    metrics.sort_by(|a, b| {
        let aw = a.get("weight").and_then(|v| v.as_f64()).unwrap_or(0.0);
        let bw = b.get("weight").and_then(|v| v.as_f64()).unwrap_or(0.0);
        bw.partial_cmp(&aw).unwrap_or(std::cmp::Ordering::Equal)
    });
    opportunities.sort_by(|a, b| {
        let asav = a.get("savings_ms").and_then(|v| v.as_f64()).unwrap_or(0.0);
        let bsav = b.get("savings_ms").and_then(|v| v.as_f64()).unwrap_or(0.0);
        bsav.partial_cmp(&asav).unwrap_or(std::cmp::Ordering::Equal)
    });
    opportunities.truncate(10);

    Some(Details { metrics, opportunities })
}