heartwood every commit a ring

Add live crawl/lighthouse status panel with real-time recrawl

712ff83e by Isaac Bythewood · 25 days ago

Add live crawl/lighthouse status panel with real-time recrawl

Surfaces run state, last-success/attempt times, duration, page counts,
insight severity counts, and errors for both the crawler and Lighthouse.
Recrawl is now a POST that polls a JSON endpoint and animates progress.

- Property: crawl_state/lighthouse_state + success/error/duration fields
- Scheduler: flips queued/running/idle, boot reset + watchdog for wedged
  states, skips already-in-flight properties
- Crawler runner: progress_cb hook so pages-crawled updates live
- Views: JSON status endpoint, recrawl/rerun-lighthouse POST endpoints
- Template + new property_crawl_status.js: monitoring panel, adaptive
  polling (2s busy / 30s idle), auto-reload on run completion
modified crawler/runner.py
@@ -43,8 +43,12 @@ def _normalize_url(url):    return cleaned.rstrip("/") or cleaneddef crawl(start_url):    """Fetch up to PAGE_CAP pages from the same host and collect metadata."""def crawl(start_url, progress_cb=None):    """Fetch up to PAGE_CAP pages from the same host and collect metadata.    `progress_cb(pages_count)` is invoked after each batch so callers can    surface live progress without blocking the crawl.    """    session = make_session()    parsed = urlparse(start_url)
@@ -123,6 +127,12 @@ def crawl(start_url):                seen.add(_normalize_url(r.url))                pages.append(page)            if progress_cb is not None:                try:                    progress_cb(len(pages))                except Exception:                    logger.exception("[crawler] progress callback failed")    if time.time() >= deadline:        logger.warning(            "[crawler] hit deadline for %s after %d pages",
@@ -207,11 +217,11 @@ def _write_debug_output(crawl_result):        logger.exception("[crawler] failed writing debug output to %s", path)def run_seo_spider(url):def run_seo_spider(url, progress_cb=None):    """Crawl `url`, write debug output, return list of insight dicts."""    start = time.time()    logger.info("[crawler] starting %s", url)    result = crawl(url)    result = crawl(url, progress_cb=progress_cb)    insights = run_checks(result)    _write_debug_output(result)    logger.info(
modified properties/management/commands/scheduler.py
@@ -23,6 +23,34 @@ class Command(BaseCommand):        Check.objects.filter(created_at__lt=timezone.now() - timezone.timedelta(days=3)).delete()        self.stdout.write("[Scheduler] Cleaned checks older than 3 days.")    def reset_wedged_states(self):        """Flip stale running/queued states back to idle.        Runs on startup (catches states left over from a crashed scheduler)        and each cycle (catches threads that overran JOIN_TIMEOUT).        """        now = timezone.now()        crawl_cutoff = now - timezone.timedelta(seconds=900)        lh_cutoff = now - timezone.timedelta(seconds=300)        Property.objects.filter(            crawl_state__in=["queued", "running"],        ).filter(            Q(crawl_started_at__isnull=True) | Q(crawl_started_at__lt=crawl_cutoff)        ).update(            crawl_state="idle",            last_crawl_error="Crawl timed out or was interrupted",        )        Property.objects.filter(            lighthouse_state__in=["queued", "running"],        ).filter(            Q(lighthouse_started_at__isnull=True) | Q(lighthouse_started_at__lt=lh_cutoff)        ).update(            lighthouse_state="idle",            last_lighthouse_error="Lighthouse run timed out or was interrupted",        )    def thread_target(self, property_id):        property = Property.objects.get(id=property_id)        self.stdout.write("[Scheduler] Checking status {}".format(property.url))
@@ -115,12 +143,17 @@ class Command(BaseCommand):            Q(last_lighthouse_run_at__isnull=True)            | Q(next_lighthouse_run_at__isnull=True)            | Q(next_lighthouse_run_at__lte=now)        )        ).exclude(lighthouse_state__in=["queued", "running"])        properties = list(due)        for p in properties:            p.next_lighthouse_run_at = p.get_next_run_at_lighthouse()            p.last_lighthouse_run_at = timezone.now()            p.save(update_fields=["next_lighthouse_run_at", "last_lighthouse_run_at"])            p.lighthouse_state = "queued"            p.save(update_fields=[                "next_lighthouse_run_at",                "last_lighthouse_run_at",                "lighthouse_state",            ])        properties = [p.id for p in properties]        db.connections.close_all()
@@ -133,12 +166,17 @@ class Command(BaseCommand):            Q(last_run_at_crawler__isnull=True)            | Q(next_run_at_crawler__isnull=True)            | Q(next_run_at_crawler__lte=now)        )        ).exclude(crawl_state__in=["queued", "running"])        properties = list(due)        for p in properties:            p.next_run_at_crawler = p.get_next_run_at_crawl()            p.last_run_at_crawler = timezone.now()            p.save(update_fields=["next_run_at_crawler", "last_run_at_crawler"])            p.crawl_state = "queued"            p.save(update_fields=[                "next_run_at_crawler",                "last_run_at_crawler",                "crawl_state",            ])        properties = [p.id for p in properties]        db.connections.close_all()
@@ -148,6 +186,15 @@ class Command(BaseCommand):    def handle(self, *args, **options):        self.stdout.write("[Scheduler] Starting scheduler...")        # Clear any running/queued states left over from a prior crash so        # that rows don't sit stuck and block new runs.        Property.objects.filter(crawl_state__in=["queued", "running"]).update(            crawl_state="idle"        )        Property.objects.filter(lighthouse_state__in=["queued", "running"]).update(            lighthouse_state="idle"        )        # Start queue_process thread        t = threading.Thread(target=self.queue_process)        t.daemon = True
@@ -163,6 +210,7 @@ class Command(BaseCommand):            self.queue_check_status()            self.queue_check_lighthouse()            self.queue_check_crawler()            self.reset_wedged_states()            self.clean_checks()            self.stdout.write("[Scheduler] Sleeping scheduler for 30 seconds...")
added properties/migrations/0010_crawl_lighthouse_state_tracking.py
@@ -0,0 +1,74 @@# Generated by Django 6.0.4 on 2026-04-16 23:39from django.db import migrations, modelsclass Migration(migrations.Migration):    dependencies = [        ("properties", "0009_check_properties__propert_b3e3ac_idx"),    ]    operations = [        migrations.AddField(            model_name="property",            name="crawl_started_at",            field=models.DateTimeField(blank=True, null=True),        ),        migrations.AddField(            model_name="property",            name="crawl_state",            field=models.CharField(                choices=[                    ("idle", "Idle"),                    ("queued", "Queued"),                    ("running", "Running"),                ],                default="idle",                max_length=10,            ),        ),        migrations.AddField(            model_name="property",            name="last_crawl_duration_ms",            field=models.IntegerField(blank=True, null=True),        ),        migrations.AddField(            model_name="property",            name="last_crawl_error",            field=models.TextField(blank=True, null=True),        ),        migrations.AddField(            model_name="property",            name="last_crawl_pages_count",            field=models.IntegerField(blank=True, null=True),        ),        migrations.AddField(            model_name="property",            name="last_crawl_success_at",            field=models.DateTimeField(blank=True, null=True),        ),        migrations.AddField(            model_name="property",            name="last_lighthouse_duration_ms",            field=models.IntegerField(blank=True, null=True),        ),        migrations.AddField(            model_name="property",            name="lighthouse_started_at",            field=models.DateTimeField(blank=True, null=True),        ),        migrations.AddField(            model_name="property",            name="lighthouse_state",            field=models.CharField(                choices=[                    ("idle", "Idle"),                    ("queued", "Queued"),                    ("running", "Running"),                ],                default="idle",                max_length=10,            ),        ),    ]
modified properties/models.py
@@ -1,4 +1,5 @@import reimport timeimport uuidimport logging
@@ -217,10 +218,34 @@ class CrawlerMixin:            return True        return self.next_run_at_crawler <= now    def _report_crawl_progress(self, pages_count):        Property.objects.filter(pk=self.pk).update(last_crawl_pages_count=pages_count)    def crawl_site(self):        insights = run_seo_spider(self.url)        self.crawler_insights = insights        self.save(update_fields=["crawler_insights"])        Property.objects.filter(pk=self.pk).update(            crawl_state="running",            crawl_started_at=timezone.now(),            last_crawl_pages_count=0,        )        start = time.monotonic()        try:            insights = run_seo_spider(self.url, progress_cb=self._report_crawl_progress)        except Exception as e:            logger.exception("Crawl failed for %s", self.url)            Property.objects.filter(pk=self.pk).update(                crawl_state="idle",                last_crawl_error=f"{type(e).__name__}: {e}",                last_crawl_duration_ms=int((time.monotonic() - start) * 1000),            )            return        duration_ms = int((time.monotonic() - start) * 1000)        Property.objects.filter(pk=self.pk).update(            crawler_insights=insights,            crawl_state="idle",            last_crawl_success_at=timezone.now(),            last_crawl_error=None,            last_crawl_duration_ms=duration_ms,        )class Property(CrawlerMixin, AlertsMixin, SecurityMixin, models.Model):
@@ -237,12 +262,29 @@ class Property(CrawlerMixin, AlertsMixin, SecurityMixin, models.Model):    last_run_at_crawler = models.DateTimeField(blank=True, null=True)    next_run_at_crawler = models.DateTimeField(blank=True, null=True)    crawler_insights = models.JSONField(blank=True, null=True)    crawl_state = models.CharField(        max_length=10,        choices=[("idle", "Idle"), ("queued", "Queued"), ("running", "Running")],        default="idle",    )    crawl_started_at = models.DateTimeField(blank=True, null=True)    last_crawl_success_at = models.DateTimeField(blank=True, null=True)    last_crawl_error = models.TextField(blank=True, null=True)    last_crawl_duration_ms = models.IntegerField(blank=True, null=True)    last_crawl_pages_count = models.IntegerField(blank=True, null=True)    lighthouse_scores = models.JSONField(blank=True, null=True)    last_lighthouse_run_at = models.DateTimeField(blank=True, null=True)    last_lighthouse_success_at = models.DateTimeField(blank=True, null=True)    last_lighthouse_error = models.TextField(blank=True, null=True)    last_lighthouse_duration_ms = models.IntegerField(blank=True, null=True)    next_lighthouse_run_at = models.DateTimeField(blank=True, null=True)    lighthouse_state = models.CharField(        max_length=10,        choices=[("idle", "Idle"), ("queued", "Queued"), ("running", "Running")],        default="idle",    )    lighthouse_started_at = models.DateTimeField(blank=True, null=True)    # Alert state tracking    last_alert_sent = models.DateTimeField(blank=True, null=True)
@@ -332,29 +374,37 @@ class Property(CrawlerMixin, AlertsMixin, SecurityMixin, models.Model):        self.run_check_lighthouse()    def run_check_lighthouse(self):        Property.objects.filter(pk=self.pk).update(            lighthouse_state="running",            lighthouse_started_at=timezone.now(),        )        start = time.monotonic()        try:            results = fetch_lighthouse_results(self.url)            scores = parse_lighthouse_results(results)        except LighthouseError as e:            logger.warning("Lighthouse failed for %s: %s", self.url, e)            self.last_lighthouse_error = str(e)            self.save(update_fields=["last_lighthouse_error"])            Property.objects.filter(pk=self.pk).update(                lighthouse_state="idle",                last_lighthouse_error=str(e),                last_lighthouse_duration_ms=int((time.monotonic() - start) * 1000),            )            return        except Exception as e:            logger.exception("Unexpected lighthouse error for %s", self.url)            self.last_lighthouse_error = f"{type(e).__name__}: {e}"            self.save(update_fields=["last_lighthouse_error"])            Property.objects.filter(pk=self.pk).update(                lighthouse_state="idle",                last_lighthouse_error=f"{type(e).__name__}: {e}",                last_lighthouse_duration_ms=int((time.monotonic() - start) * 1000),            )            return        self.lighthouse_scores = scores        self.last_lighthouse_success_at = timezone.now()        self.last_lighthouse_error = None        self.save(            update_fields=[                "lighthouse_scores",                "last_lighthouse_success_at",                "last_lighthouse_error",            ]        Property.objects.filter(pk=self.pk).update(            lighthouse_scores=scores,            last_lighthouse_success_at=timezone.now(),            last_lighthouse_error=None,            last_lighthouse_duration_ms=int((time.monotonic() - start) * 1000),            lighthouse_state="idle",        )    @property
modified properties/static_src/index.js
@@ -1,4 +1,5 @@import "./scripts/property_graphs.js";import "./scripts/property_is_public.js";import "./scripts/property_crawl_status.js";import "./styles/print.scss";
added properties/static_src/scripts/property_crawl_status.js
@@ -0,0 +1,326 @@// Polls the crawl/lighthouse status endpoint and updates the monitoring// panel in-place. When a crawl or lighthouse run is active, polling is// fast (2s); when idle, it's slow (30s) so older tabs don't hammer the// server.const FAST_POLL_MS = 2000;const SLOW_POLL_MS = 30000;function $(root, selector) {  return root.querySelector(selector);}function setText(root, field, value) {  const el = root.querySelector(`[data-field="${field}"]`);  if (el) el.textContent = value;}function show(root, field, visible) {  const el = root.querySelector(`[data-field="${field}"]`);  if (!el) return;  el.classList.toggle("d-none", !visible);}function humanDuration(ms) {  if (ms == null) return "—";  if (ms < 1000) return `${ms} ms`;  const s = ms / 1000;  if (s < 60) return `${s.toFixed(1)} s`;  const m = Math.floor(s / 60);  const rs = Math.round(s - m * 60);  return `${m}m ${rs}s`;}function relativeTime(iso, now) {  if (!iso) return null;  const then = new Date(iso).getTime();  const diff = now - then;  const future = diff < 0;  const abs = Math.abs(diff);  const s = Math.round(abs / 1000);  let text;  if (s < 45) text = `${s}s`;  else if (s < 3600) text = `${Math.round(s / 60)}m`;  else if (s < 86400) text = `${Math.round(s / 3600)}h`;  else text = `${Math.round(s / 86400)}d`;  return future ? `in ${text}` : `${text} ago`;}function formatAbsolute(iso) {  if (!iso) return "";  const d = new Date(iso);  return d.toLocaleString();}function stateBadge(state) {  switch (state) {    case "running":      return { label: "Running", cls: "bg-primary" };    case "queued":      return { label: "Queued", cls: "bg-info text-dark" };    default:      return { label: "Idle", cls: "bg-secondary" };  }}function renderWhen(root, field, iso, now) {  const el = root.querySelector(`[data-field="${field}"]`);  if (!el) return;  if (!iso) {    el.textContent = "—";    el.removeAttribute("title");    return;  }  const rel = relativeTime(iso, now);  el.textContent = rel;  el.title = formatAbsolute(iso);}function renderCrawler(root, crawler, serverNow) {  const badge = root.querySelector('[data-field="crawler.state_badge"]');  const s = stateBadge(crawler.state);  badge.className = `badge ${s.cls}`;  badge.textContent = s.label;  show(root, "crawler.progress_wrap", crawler.state === "running");  if (crawler.state === "running") {    const bar = root.querySelector('[data-field="crawler.progress_bar"]');    const pct = Math.round((crawler.progress || 0) * 100);    bar.style.width = `${pct}%`;    bar.setAttribute("aria-valuenow", pct);  }  show(root, "crawler.error_box", !!crawler.last_error);  if (crawler.last_error) {    setText(root, "crawler.error_text", crawler.last_error);  }  renderWhen(root, "crawler.last_success", crawler.last_success_at, serverNow);  renderWhen(root, "crawler.last_attempt", crawler.last_attempt_at, serverNow);  const pagesEl = root.querySelector('[data-field="crawler.pages"]');  if (crawler.state === "running") {    pagesEl.textContent = `${crawler.pages_count || 0} so far…`;  } else if (crawler.pages_count != null) {    pagesEl.textContent = `${crawler.pages_count}`;  } else {    pagesEl.textContent = "—";  }  setText(root, "crawler.duration", humanDuration(crawler.last_duration_ms));  const ins = crawler.insights_by_severity || { error: 0, warning: 0, info: 0 };  const insEl = root.querySelector('[data-field="crawler.insights"]');  insEl.innerHTML = `    <span class="badge bg-danger me-1">${ins.error} err</span>    <span class="badge bg-warning text-dark me-1">${ins.warning} warn</span>    <span class="badge bg-info text-dark">${ins.info} info</span>  `;  const nextEl = root.querySelector('[data-field="crawler.next_run"]');  if (!crawler.next_run_at) {    nextEl.textContent = "—";    nextEl.removeAttribute("title");  } else if (crawler.state === "running" || crawler.state === "queued") {    nextEl.textContent = "— (running now)";    nextEl.title = formatAbsolute(crawler.next_run_at);  } else if (crawler.is_overdue) {    nextEl.innerHTML = `<span class="text-warning">due now</span>`;    nextEl.title = formatAbsolute(crawler.next_run_at);  } else {    nextEl.textContent = relativeTime(crawler.next_run_at, serverNow);    nextEl.title = formatAbsolute(crawler.next_run_at);  }}function renderLighthouse(root, lh, serverNow) {  const badge = root.querySelector('[data-field="lighthouse.state_badge"]');  const s = stateBadge(lh.state);  badge.className = `badge ${s.cls}`;  badge.textContent = s.label;  show(root, "lighthouse.error_box", !!lh.last_error);  if (lh.last_error) {    setText(root, "lighthouse.error_text", lh.last_error);  }  renderWhen(root, "lighthouse.last_success", lh.last_success_at, serverNow);  renderWhen(root, "lighthouse.last_attempt", lh.last_attempt_at, serverNow);  setText(root, "lighthouse.duration", humanDuration(lh.last_duration_ms));  const nextEl = root.querySelector('[data-field="lighthouse.next_run"]');  if (!lh.next_run_at) {    nextEl.textContent = "—";    nextEl.removeAttribute("title");  } else if (lh.state === "running" || lh.state === "queued") {    nextEl.textContent = "— (running now)";    nextEl.title = formatAbsolute(lh.next_run_at);  } else if (lh.is_overdue) {    nextEl.innerHTML = `<span class="text-warning">due now</span>`;    nextEl.title = formatAbsolute(lh.next_run_at);  } else {    nextEl.textContent = relativeTime(lh.next_run_at, serverNow);    nextEl.title = formatAbsolute(lh.next_run_at);  }}function updateRecrawlButton(data) {  const btn = document.getElementById("recrawl-btn");  if (!btn) return;  const state = data.crawler.state;  // "overdue + idle" means the user already requested a recrawl but the  // scheduler hasn't picked it up yet (up to ~30s).  const waitingForScheduler = state === "idle" && data.crawler.is_overdue;  const busy =    state === "queued" || state === "running" || waitingForScheduler;  btn.disabled = busy;  const label = btn.querySelector(".recrawl-btn-label");  const spinner = btn.querySelector(".recrawl-btn-spinner");  if (busy) {    spinner.classList.remove("d-none");    if (state === "running") {      const n = data.crawler.pages_count || 0;      label.textContent = n > 0 ? `Crawling (${n})` : "Crawling…";    } else if (state === "queued") {      label.textContent = "Queued…";    } else {      label.textContent = "Waiting for scheduler…";    }  } else {    spinner.classList.add("d-none");    label.textContent = "Recrawl";  }}function updateRerunLighthouseButton(data) {  const btn = document.getElementById("rerun-lighthouse-btn");  if (!btn) return;  const state = data.lighthouse.state;  const waitingForScheduler = state === "idle" && data.lighthouse.is_overdue;  const busy =    state === "queued" || state === "running" || waitingForScheduler;  btn.disabled = busy;  const label = btn.querySelector(".rerun-lh-label");  const spinner = btn.querySelector(".rerun-lh-spinner");  if (busy) {    spinner.classList.remove("d-none");    if (state === "running") label.textContent = "Running";    else if (state === "queued") label.textContent = "Queued";    else label.textContent = "Waiting…";  } else {    spinner.classList.add("d-none");    label.textContent = "Rerun";  }}function getCsrfToken() {  const input = document.querySelector("input[name=csrfmiddlewaretoken]");  return input ? input.value : "";}async function triggerPost(url, onDone) {  try {    const res = await fetch(url, {      method: "POST",      headers: {        "X-CSRFToken": getCsrfToken(),        "Accept": "application/json",      },      credentials: "same-origin",    });    if (!res.ok) {      console.error("POST failed", url, res.status);      return;    }    const data = await res.json();    if (onDone) onDone(data);  } catch (err) {    console.error("POST error", url, err);  }}document.addEventListener("DOMContentLoaded", function () {  const root = document.getElementById("monitoring-status");  if (!root) return;  const statusUrl = root.dataset.statusUrl;  const recrawlUrl = root.dataset.recrawlUrl;  const rerunLighthouseUrl = root.dataset.rerunLighthouseUrl;  let prevCrawlState = null;  let prevLhState = null;  let timer = null;  function schedule(data) {    const active =      data.crawler.state !== "idle" ||      data.lighthouse.state !== "idle" ||      data.crawler.is_overdue ||      data.lighthouse.is_overdue;    const delay = active ? FAST_POLL_MS : SLOW_POLL_MS;    clearTimeout(timer);    timer = setTimeout(poll, delay);  }  function applyData(data) {    const serverNow = data.server_time ? new Date(data.server_time).getTime() : Date.now();    renderCrawler(root, data.crawler, serverNow);    renderLighthouse(root, data.lighthouse, serverNow);    updateRecrawlButton(data);    updateRerunLighthouseButton(data);    // If either subsystem just went idle after being active, refresh the    // page once so server-rendered charts/insights update.    const crawlerFinished =      prevCrawlState && prevCrawlState !== "idle" && data.crawler.state === "idle";    const lhFinished =      prevLhState && prevLhState !== "idle" && data.lighthouse.state === "idle";    prevCrawlState = data.crawler.state;    prevLhState = data.lighthouse.state;    if (crawlerFinished || lhFinished) {      window.location.reload();      return;    }    schedule(data);  }  async function poll() {    try {      const res = await fetch(statusUrl, {        credentials: "same-origin",        headers: { Accept: "application/json" },      });      if (!res.ok) {        timer = setTimeout(poll, SLOW_POLL_MS);        return;      }      const data = await res.json();      applyData(data);    } catch (err) {      console.error("status poll failed", err);      timer = setTimeout(poll, SLOW_POLL_MS);    }  }  const recrawlBtn = document.getElementById("recrawl-btn");  if (recrawlBtn && recrawlUrl) {    recrawlBtn.addEventListener("click", function () {      recrawlBtn.disabled = true;      triggerPost(recrawlUrl, function (data) {        applyData(data);      });    });  }  const rerunLhBtn = document.getElementById("rerun-lighthouse-btn");  if (rerunLhBtn && rerunLighthouseUrl) {    rerunLhBtn.addEventListener("click", function () {      rerunLhBtn.disabled = true;      triggerPost(rerunLighthouseUrl, function (data) {        applyData(data);      });    });  }  poll();});
modified properties/templates/properties/property.html
@@ -53,9 +53,10 @@          <a href="{% url 'property' property.id %}?report" target="_blank" class="btn btn-sm btn-primary ms-0 ms-lg-3 my-1">            Report          </a>          <a href="{% url 'property' property.id %}?recrawl" class="btn btn-sm btn-primary ms-0 ms-lg-3 my-1">            Recrawl          </a>          <button type="button" id="recrawl-btn" class="btn btn-sm btn-primary ms-0 ms-lg-3 my-1" data-property-id="{{ property.id }}">            <span class="recrawl-btn-label">Recrawl</span>            <span class="recrawl-btn-spinner spinner-border spinner-border-sm ms-1 d-none" role="status" aria-hidden="true"></span>          </button>          {% endif %}          {% if not property.is_protected and user.is_authenticated %}          <button type="button" class="btn btn-sm btn-outline-danger ms-1 ms-lg-3 my-1" data-bs-toggle="modal" data-bs-target="#delete-modal-{{ property.id }}">
@@ -129,14 +130,79 @@</div>{% endif %}{% if property.last_lighthouse_error and user.is_authenticated %}<div class="container-fluid mb-4">  <div class="alert alert-warning mb-0" role="alert">    <strong>Last Lighthouse run failed:</strong>    <code>{{ property.last_lighthouse_error }}</code>    {% if property.last_lighthouse_run_at %}      <span class="text-muted small d-block">attempted {{ property.last_lighthouse_run_at|timesince }} ago</span>    {% endif %}{% if user.is_authenticated %}<form style="display:none">{% csrf_token %}</form><div class="container mt-4 d-print-none" id="monitoring-status" data-property-id="{{ property.id }}" data-status-url="{% url 'property_status' property.id %}" data-recrawl-url="{% url 'property_recrawl' property.id %}" data-rerun-lighthouse-url="{% url 'property_rerun_lighthouse' property.id %}">  <div class="row g-3">    <div class="col-12 col-md-6">      <div class="card h-100">        <div class="card-header bg-dark text-white d-flex justify-content-between align-items-center">          <strong>Crawler</strong>          <span class="badge" data-field="crawler.state_badge">&nbsp;</span>        </div>        <div class="card-body">          <div class="progress mb-3 d-none" data-field="crawler.progress_wrap" style="height: 6px;">            <div class="progress-bar progress-bar-striped progress-bar-animated" role="progressbar" data-field="crawler.progress_bar" style="width: 0%"></div>          </div>          <div class="alert alert-danger py-2 mb-3 d-none small" data-field="crawler.error_box" role="alert">            <strong>Last crawl failed:</strong>            <code data-field="crawler.error_text"></code>          </div>          <dl class="row small mb-0">            <dt class="col-5 col-sm-5 text-muted">Last success</dt>            <dd class="col-7 col-sm-7 mb-1" data-field="crawler.last_success">—</dd>            <dt class="col-5 col-sm-5 text-muted">Last attempt</dt>            <dd class="col-7 col-sm-7 mb-1" data-field="crawler.last_attempt">—</dd>            <dt class="col-5 col-sm-5 text-muted">Pages crawled</dt>            <dd class="col-7 col-sm-7 mb-1" data-field="crawler.pages">—</dd>            <dt class="col-5 col-sm-5 text-muted">Duration</dt>            <dd class="col-7 col-sm-7 mb-1" data-field="crawler.duration">—</dd>            <dt class="col-5 col-sm-5 text-muted">Issues found</dt>            <dd class="col-7 col-sm-7 mb-1" data-field="crawler.insights">—</dd>            <dt class="col-5 col-sm-5 text-muted">Next run</dt>            <dd class="col-7 col-sm-7 mb-0" data-field="crawler.next_run">—</dd>          </dl>        </div>      </div>    </div>    <div class="col-12 col-md-6">      <div class="card h-100">        <div class="card-header bg-dark text-white d-flex justify-content-between align-items-center">          <strong>Lighthouse</strong>          <span class="d-flex align-items-center gap-2">            <span class="badge" data-field="lighthouse.state_badge">&nbsp;</span>            <button type="button" id="rerun-lighthouse-btn" class="btn btn-sm btn-outline-light py-0" title="Rerun Lighthouse now">              <span class="rerun-lh-label">Rerun</span>              <span class="rerun-lh-spinner spinner-border spinner-border-sm ms-1 d-none" role="status" aria-hidden="true"></span>            </button>          </span>        </div>        <div class="card-body">          <div class="alert alert-warning py-2 mb-3 d-none small" data-field="lighthouse.error_box" role="alert">            <strong>Last Lighthouse run failed:</strong>            <code data-field="lighthouse.error_text"></code>          </div>          <dl class="row small mb-0">            <dt class="col-5 col-sm-5 text-muted">Last success</dt>            <dd class="col-7 col-sm-7 mb-1" data-field="lighthouse.last_success">—</dd>            <dt class="col-5 col-sm-5 text-muted">Last attempt</dt>            <dd class="col-7 col-sm-7 mb-1" data-field="lighthouse.last_attempt">—</dd>            <dt class="col-5 col-sm-5 text-muted">Duration</dt>            <dd class="col-7 col-sm-7 mb-1" data-field="lighthouse.duration">—</dd>            <dt class="col-5 col-sm-5 text-muted">Next run</dt>            <dd class="col-7 col-sm-7 mb-0" data-field="lighthouse.next_run">—</dd>          </dl>        </div>      </div>    </div>  </div></div>{% endif %}
modified properties/urls.py
@@ -7,6 +7,9 @@ urlpatterns = [    path('<uuid:property_id>/', views.property, name='property'),    path('<uuid:property_id>/delete/', views.property_delete, name='property_delete'),    path('<uuid:property_id>/is-public/', views.adjust_is_public_property, name='adjust_is_public_property'),    path('<uuid:property_id>/status/', views.property_status, name='property_status'),    path('<uuid:property_id>/recrawl/', views.property_recrawl, name='property_recrawl'),    path('<uuid:property_id>/rerun-lighthouse/', views.property_rerun_lighthouse, name='property_rerun_lighthouse'),    path('import/', views.import_properties, name='import_properties'),    path('', views.properties, name='properties'),]
modified properties/views.py
@@ -104,12 +104,6 @@ def property(request, property_id):    if not property_obj.is_public and property_obj.user != request.user:        return redirect("properties")    if property_obj.user == request.user and request.GET.get('recrawl') == '':        property_obj.next_run_at_crawler = timezone.now()        property_obj.save()        messages.success(request, "This property will be recrawled shortly.")        return redirect("property", property_id=property_id)    # Set some basic page context variables    context["title"] = property_obj.name    context["description"] = "Status for " + property_obj.name
@@ -158,6 +152,139 @@ def property(request, property_id):    return render(request, "properties/property.html", context)def _crawl_progress(property_obj):    """Return the fraction (0-1) of the discovered work that's complete."""    from crawler.fetcher import PAGE_CAP    pages = property_obj.last_crawl_pages_count or 0    if pages <= 0:        return 0.05  # show *some* movement once we start    # We don't know the total ahead of time, so use a log-ish ratio capped at    # ~90% — the last 10% is reserved for post-crawl check processing.    return min(pages / PAGE_CAP, 0.9)def _serialize_status(property_obj):    now = timezone.now()    crawl_next = property_obj.next_run_at_crawler    lh_next = property_obj.next_lighthouse_run_at    insights = property_obj.crawler_insights or []    severity_counts = {"error": 0, "warning": 0, "info": 0}    for insight in insights:        sev = insight.get("severity", "info")        if sev in severity_counts:            severity_counts[sev] += 1    return {        "crawler": {            "state": property_obj.crawl_state,            "started_at": property_obj.crawl_started_at.isoformat()            if property_obj.crawl_started_at            else None,            "last_attempt_at": property_obj.last_run_at_crawler.isoformat()            if property_obj.last_run_at_crawler            else None,            "last_success_at": property_obj.last_crawl_success_at.isoformat()            if property_obj.last_crawl_success_at            else None,            "last_error": property_obj.last_crawl_error,            "last_duration_ms": property_obj.last_crawl_duration_ms,            "pages_count": property_obj.last_crawl_pages_count,            "next_run_at": crawl_next.isoformat() if crawl_next else None,            "is_overdue": bool(crawl_next and crawl_next <= now),            "insights_total": len(insights),            "insights_by_severity": severity_counts,            "progress": _crawl_progress(property_obj)            if property_obj.crawl_state == "running"            else None,        },        "lighthouse": {            "state": property_obj.lighthouse_state,            "started_at": property_obj.lighthouse_started_at.isoformat()            if property_obj.lighthouse_started_at            else None,            "last_attempt_at": property_obj.last_lighthouse_run_at.isoformat()            if property_obj.last_lighthouse_run_at            else None,            "last_success_at": property_obj.last_lighthouse_success_at.isoformat()            if property_obj.last_lighthouse_success_at            else None,            "last_error": property_obj.last_lighthouse_error,            "last_duration_ms": property_obj.last_lighthouse_duration_ms,            "next_run_at": lh_next.isoformat() if lh_next else None,            "is_overdue": bool(lh_next and lh_next <= now),            "scores": property_obj.lighthouse_scores,        },        "server_time": now.isoformat(),    }def property_status(request, property_id):    try:        property_obj = Property.objects.get(pk=property_id)    except Property.DoesNotExist:        return JsonResponse({"error": "not_found"}, status=404)    if not property_obj.is_public and property_obj.user != request.user:        return JsonResponse({"error": "forbidden"}, status=403)    return JsonResponse(_serialize_status(property_obj))def property_recrawl(request, property_id):    if not request.user.is_authenticated:        return JsonResponse({"error": "forbidden"}, status=403)    if request.method != "POST":        return JsonResponse({"error": "method_not_allowed"}, status=405)    try:        property_obj = request.user.properties.get(pk=property_id)    except Property.DoesNotExist:        return JsonResponse({"error": "not_found"}, status=404)    if property_obj.crawl_state in ("queued", "running"):        return JsonResponse(            {                "ok": False,                "reason": "already_running",                **_serialize_status(property_obj),            }        )    property_obj.next_run_at_crawler = timezone.now()    property_obj.save(update_fields=["next_run_at_crawler"])    return JsonResponse({"ok": True, **_serialize_status(property_obj)})def property_rerun_lighthouse(request, property_id):    if not request.user.is_authenticated:        return JsonResponse({"error": "forbidden"}, status=403)    if request.method != "POST":        return JsonResponse({"error": "method_not_allowed"}, status=405)    try:        property_obj = request.user.properties.get(pk=property_id)    except Property.DoesNotExist:        return JsonResponse({"error": "not_found"}, status=404)    if property_obj.lighthouse_state in ("queued", "running"):        return JsonResponse(            {                "ok": False,                "reason": "already_running",                **_serialize_status(property_obj),            }        )    property_obj.next_lighthouse_run_at = timezone.now()    property_obj.save(update_fields=["next_lighthouse_run_at"])    return JsonResponse({"ok": True, **_serialize_status(property_obj)})def import_property(request, url):    url = url.lower().strip()    if not url.startswith("http"):