modified
crawler/runner.py
@@ -43,8 +43,12 @@ def _normalize_url(url): return cleaned.rstrip("/") or cleaneddef crawl(start_url): """Fetch up to PAGE_CAP pages from the same host and collect metadata."""def crawl(start_url, progress_cb=None): """Fetch up to PAGE_CAP pages from the same host and collect metadata. `progress_cb(pages_count)` is invoked after each batch so callers can surface live progress without blocking the crawl. """ session = make_session() parsed = urlparse(start_url)
@@ -123,6 +127,12 @@ def crawl(start_url): seen.add(_normalize_url(r.url)) pages.append(page) if progress_cb is not None: try: progress_cb(len(pages)) except Exception: logger.exception("[crawler] progress callback failed") if time.time() >= deadline: logger.warning( "[crawler] hit deadline for %s after %d pages",
@@ -207,11 +217,11 @@ def _write_debug_output(crawl_result): logger.exception("[crawler] failed writing debug output to %s", path)def run_seo_spider(url):def run_seo_spider(url, progress_cb=None): """Crawl `url`, write debug output, return list of insight dicts.""" start = time.time() logger.info("[crawler] starting %s", url) result = crawl(url) result = crawl(url, progress_cb=progress_cb) insights = run_checks(result) _write_debug_output(result) logger.info(
modified
properties/management/commands/scheduler.py
@@ -23,6 +23,34 @@ class Command(BaseCommand): Check.objects.filter(created_at__lt=timezone.now() - timezone.timedelta(days=3)).delete() self.stdout.write("[Scheduler] Cleaned checks older than 3 days.") def reset_wedged_states(self): """Flip stale running/queued states back to idle. Runs on startup (catches states left over from a crashed scheduler) and each cycle (catches threads that overran JOIN_TIMEOUT). """ now = timezone.now() crawl_cutoff = now - timezone.timedelta(seconds=900) lh_cutoff = now - timezone.timedelta(seconds=300) Property.objects.filter( crawl_state__in=["queued", "running"], ).filter( Q(crawl_started_at__isnull=True) | Q(crawl_started_at__lt=crawl_cutoff) ).update( crawl_state="idle", last_crawl_error="Crawl timed out or was interrupted", ) Property.objects.filter( lighthouse_state__in=["queued", "running"], ).filter( Q(lighthouse_started_at__isnull=True) | Q(lighthouse_started_at__lt=lh_cutoff) ).update( lighthouse_state="idle", last_lighthouse_error="Lighthouse run timed out or was interrupted", ) def thread_target(self, property_id): property = Property.objects.get(id=property_id) self.stdout.write("[Scheduler] Checking status {}".format(property.url))
@@ -115,12 +143,17 @@ class Command(BaseCommand): Q(last_lighthouse_run_at__isnull=True) | Q(next_lighthouse_run_at__isnull=True) | Q(next_lighthouse_run_at__lte=now) ) ).exclude(lighthouse_state__in=["queued", "running"]) properties = list(due) for p in properties: p.next_lighthouse_run_at = p.get_next_run_at_lighthouse() p.last_lighthouse_run_at = timezone.now() p.save(update_fields=["next_lighthouse_run_at", "last_lighthouse_run_at"]) p.lighthouse_state = "queued" p.save(update_fields=[ "next_lighthouse_run_at", "last_lighthouse_run_at", "lighthouse_state", ]) properties = [p.id for p in properties] db.connections.close_all()
@@ -133,12 +166,17 @@ class Command(BaseCommand): Q(last_run_at_crawler__isnull=True) | Q(next_run_at_crawler__isnull=True) | Q(next_run_at_crawler__lte=now) ) ).exclude(crawl_state__in=["queued", "running"]) properties = list(due) for p in properties: p.next_run_at_crawler = p.get_next_run_at_crawl() p.last_run_at_crawler = timezone.now() p.save(update_fields=["next_run_at_crawler", "last_run_at_crawler"]) p.crawl_state = "queued" p.save(update_fields=[ "next_run_at_crawler", "last_run_at_crawler", "crawl_state", ]) properties = [p.id for p in properties] db.connections.close_all()
@@ -148,6 +186,15 @@ class Command(BaseCommand): def handle(self, *args, **options): self.stdout.write("[Scheduler] Starting scheduler...") # Clear any running/queued states left over from a prior crash so # that rows don't sit stuck and block new runs. Property.objects.filter(crawl_state__in=["queued", "running"]).update( crawl_state="idle" ) Property.objects.filter(lighthouse_state__in=["queued", "running"]).update( lighthouse_state="idle" ) # Start queue_process thread t = threading.Thread(target=self.queue_process) t.daemon = True
@@ -163,6 +210,7 @@ class Command(BaseCommand): self.queue_check_status() self.queue_check_lighthouse() self.queue_check_crawler() self.reset_wedged_states() self.clean_checks() self.stdout.write("[Scheduler] Sleeping scheduler for 30 seconds...")
added
properties/migrations/0010_crawl_lighthouse_state_tracking.py
@@ -0,0 +1,74 @@# Generated by Django 6.0.4 on 2026-04-16 23:39from django.db import migrations, modelsclass Migration(migrations.Migration): dependencies = [ ("properties", "0009_check_properties__propert_b3e3ac_idx"), ] operations = [ migrations.AddField( model_name="property", name="crawl_started_at", field=models.DateTimeField(blank=True, null=True), ), migrations.AddField( model_name="property", name="crawl_state", field=models.CharField( choices=[ ("idle", "Idle"), ("queued", "Queued"), ("running", "Running"), ], default="idle", max_length=10, ), ), migrations.AddField( model_name="property", name="last_crawl_duration_ms", field=models.IntegerField(blank=True, null=True), ), migrations.AddField( model_name="property", name="last_crawl_error", field=models.TextField(blank=True, null=True), ), migrations.AddField( model_name="property", name="last_crawl_pages_count", field=models.IntegerField(blank=True, null=True), ), migrations.AddField( model_name="property", name="last_crawl_success_at", field=models.DateTimeField(blank=True, null=True), ), migrations.AddField( model_name="property", name="last_lighthouse_duration_ms", field=models.IntegerField(blank=True, null=True), ), migrations.AddField( model_name="property", name="lighthouse_started_at", field=models.DateTimeField(blank=True, null=True), ), migrations.AddField( model_name="property", name="lighthouse_state", field=models.CharField( choices=[ ("idle", "Idle"), ("queued", "Queued"), ("running", "Running"), ], default="idle", max_length=10, ), ), ]
modified
properties/models.py
@@ -1,4 +1,5 @@import reimport timeimport uuidimport logging
@@ -217,10 +218,34 @@ class CrawlerMixin: return True return self.next_run_at_crawler <= now def _report_crawl_progress(self, pages_count): Property.objects.filter(pk=self.pk).update(last_crawl_pages_count=pages_count) def crawl_site(self): insights = run_seo_spider(self.url) self.crawler_insights = insights self.save(update_fields=["crawler_insights"]) Property.objects.filter(pk=self.pk).update( crawl_state="running", crawl_started_at=timezone.now(), last_crawl_pages_count=0, ) start = time.monotonic() try: insights = run_seo_spider(self.url, progress_cb=self._report_crawl_progress) except Exception as e: logger.exception("Crawl failed for %s", self.url) Property.objects.filter(pk=self.pk).update( crawl_state="idle", last_crawl_error=f"{type(e).__name__}: {e}", last_crawl_duration_ms=int((time.monotonic() - start) * 1000), ) return duration_ms = int((time.monotonic() - start) * 1000) Property.objects.filter(pk=self.pk).update( crawler_insights=insights, crawl_state="idle", last_crawl_success_at=timezone.now(), last_crawl_error=None, last_crawl_duration_ms=duration_ms, )class Property(CrawlerMixin, AlertsMixin, SecurityMixin, models.Model):
@@ -237,12 +262,29 @@ class Property(CrawlerMixin, AlertsMixin, SecurityMixin, models.Model): last_run_at_crawler = models.DateTimeField(blank=True, null=True) next_run_at_crawler = models.DateTimeField(blank=True, null=True) crawler_insights = models.JSONField(blank=True, null=True) crawl_state = models.CharField( max_length=10, choices=[("idle", "Idle"), ("queued", "Queued"), ("running", "Running")], default="idle", ) crawl_started_at = models.DateTimeField(blank=True, null=True) last_crawl_success_at = models.DateTimeField(blank=True, null=True) last_crawl_error = models.TextField(blank=True, null=True) last_crawl_duration_ms = models.IntegerField(blank=True, null=True) last_crawl_pages_count = models.IntegerField(blank=True, null=True) lighthouse_scores = models.JSONField(blank=True, null=True) last_lighthouse_run_at = models.DateTimeField(blank=True, null=True) last_lighthouse_success_at = models.DateTimeField(blank=True, null=True) last_lighthouse_error = models.TextField(blank=True, null=True) last_lighthouse_duration_ms = models.IntegerField(blank=True, null=True) next_lighthouse_run_at = models.DateTimeField(blank=True, null=True) lighthouse_state = models.CharField( max_length=10, choices=[("idle", "Idle"), ("queued", "Queued"), ("running", "Running")], default="idle", ) lighthouse_started_at = models.DateTimeField(blank=True, null=True) # Alert state tracking last_alert_sent = models.DateTimeField(blank=True, null=True)
@@ -332,29 +374,37 @@ class Property(CrawlerMixin, AlertsMixin, SecurityMixin, models.Model): self.run_check_lighthouse() def run_check_lighthouse(self): Property.objects.filter(pk=self.pk).update( lighthouse_state="running", lighthouse_started_at=timezone.now(), ) start = time.monotonic() try: results = fetch_lighthouse_results(self.url) scores = parse_lighthouse_results(results) except LighthouseError as e: logger.warning("Lighthouse failed for %s: %s", self.url, e) self.last_lighthouse_error = str(e) self.save(update_fields=["last_lighthouse_error"]) Property.objects.filter(pk=self.pk).update( lighthouse_state="idle", last_lighthouse_error=str(e), last_lighthouse_duration_ms=int((time.monotonic() - start) * 1000), ) return except Exception as e: logger.exception("Unexpected lighthouse error for %s", self.url) self.last_lighthouse_error = f"{type(e).__name__}: {e}" self.save(update_fields=["last_lighthouse_error"]) Property.objects.filter(pk=self.pk).update( lighthouse_state="idle", last_lighthouse_error=f"{type(e).__name__}: {e}", last_lighthouse_duration_ms=int((time.monotonic() - start) * 1000), ) return self.lighthouse_scores = scores self.last_lighthouse_success_at = timezone.now() self.last_lighthouse_error = None self.save( update_fields=[ "lighthouse_scores", "last_lighthouse_success_at", "last_lighthouse_error", ] Property.objects.filter(pk=self.pk).update( lighthouse_scores=scores, last_lighthouse_success_at=timezone.now(), last_lighthouse_error=None, last_lighthouse_duration_ms=int((time.monotonic() - start) * 1000), lighthouse_state="idle", ) @property
modified
properties/static_src/index.js
@@ -1,4 +1,5 @@import "./scripts/property_graphs.js";import "./scripts/property_is_public.js";import "./scripts/property_crawl_status.js";import "./styles/print.scss";
added
properties/static_src/scripts/property_crawl_status.js
@@ -0,0 +1,326 @@// Polls the crawl/lighthouse status endpoint and updates the monitoring// panel in-place. When a crawl or lighthouse run is active, polling is// fast (2s); when idle, it's slow (30s) so older tabs don't hammer the// server.const FAST_POLL_MS = 2000;const SLOW_POLL_MS = 30000;function $(root, selector) { return root.querySelector(selector);}function setText(root, field, value) { const el = root.querySelector(`[data-field="${field}"]`); if (el) el.textContent = value;}function show(root, field, visible) { const el = root.querySelector(`[data-field="${field}"]`); if (!el) return; el.classList.toggle("d-none", !visible);}function humanDuration(ms) { if (ms == null) return "—"; if (ms < 1000) return `${ms} ms`; const s = ms / 1000; if (s < 60) return `${s.toFixed(1)} s`; const m = Math.floor(s / 60); const rs = Math.round(s - m * 60); return `${m}m ${rs}s`;}function relativeTime(iso, now) { if (!iso) return null; const then = new Date(iso).getTime(); const diff = now - then; const future = diff < 0; const abs = Math.abs(diff); const s = Math.round(abs / 1000); let text; if (s < 45) text = `${s}s`; else if (s < 3600) text = `${Math.round(s / 60)}m`; else if (s < 86400) text = `${Math.round(s / 3600)}h`; else text = `${Math.round(s / 86400)}d`; return future ? `in ${text}` : `${text} ago`;}function formatAbsolute(iso) { if (!iso) return ""; const d = new Date(iso); return d.toLocaleString();}function stateBadge(state) { switch (state) { case "running": return { label: "Running", cls: "bg-primary" }; case "queued": return { label: "Queued", cls: "bg-info text-dark" }; default: return { label: "Idle", cls: "bg-secondary" }; }}function renderWhen(root, field, iso, now) { const el = root.querySelector(`[data-field="${field}"]`); if (!el) return; if (!iso) { el.textContent = "—"; el.removeAttribute("title"); return; } const rel = relativeTime(iso, now); el.textContent = rel; el.title = formatAbsolute(iso);}function renderCrawler(root, crawler, serverNow) { const badge = root.querySelector('[data-field="crawler.state_badge"]'); const s = stateBadge(crawler.state); badge.className = `badge ${s.cls}`; badge.textContent = s.label; show(root, "crawler.progress_wrap", crawler.state === "running"); if (crawler.state === "running") { const bar = root.querySelector('[data-field="crawler.progress_bar"]'); const pct = Math.round((crawler.progress || 0) * 100); bar.style.width = `${pct}%`; bar.setAttribute("aria-valuenow", pct); } show(root, "crawler.error_box", !!crawler.last_error); if (crawler.last_error) { setText(root, "crawler.error_text", crawler.last_error); } renderWhen(root, "crawler.last_success", crawler.last_success_at, serverNow); renderWhen(root, "crawler.last_attempt", crawler.last_attempt_at, serverNow); const pagesEl = root.querySelector('[data-field="crawler.pages"]'); if (crawler.state === "running") { pagesEl.textContent = `${crawler.pages_count || 0} so far…`; } else if (crawler.pages_count != null) { pagesEl.textContent = `${crawler.pages_count}`; } else { pagesEl.textContent = "—"; } setText(root, "crawler.duration", humanDuration(crawler.last_duration_ms)); const ins = crawler.insights_by_severity || { error: 0, warning: 0, info: 0 }; const insEl = root.querySelector('[data-field="crawler.insights"]'); insEl.innerHTML = ` <span class="badge bg-danger me-1">${ins.error} err</span> <span class="badge bg-warning text-dark me-1">${ins.warning} warn</span> <span class="badge bg-info text-dark">${ins.info} info</span> `; const nextEl = root.querySelector('[data-field="crawler.next_run"]'); if (!crawler.next_run_at) { nextEl.textContent = "—"; nextEl.removeAttribute("title"); } else if (crawler.state === "running" || crawler.state === "queued") { nextEl.textContent = "— (running now)"; nextEl.title = formatAbsolute(crawler.next_run_at); } else if (crawler.is_overdue) { nextEl.innerHTML = `<span class="text-warning">due now</span>`; nextEl.title = formatAbsolute(crawler.next_run_at); } else { nextEl.textContent = relativeTime(crawler.next_run_at, serverNow); nextEl.title = formatAbsolute(crawler.next_run_at); }}function renderLighthouse(root, lh, serverNow) { const badge = root.querySelector('[data-field="lighthouse.state_badge"]'); const s = stateBadge(lh.state); badge.className = `badge ${s.cls}`; badge.textContent = s.label; show(root, "lighthouse.error_box", !!lh.last_error); if (lh.last_error) { setText(root, "lighthouse.error_text", lh.last_error); } renderWhen(root, "lighthouse.last_success", lh.last_success_at, serverNow); renderWhen(root, "lighthouse.last_attempt", lh.last_attempt_at, serverNow); setText(root, "lighthouse.duration", humanDuration(lh.last_duration_ms)); const nextEl = root.querySelector('[data-field="lighthouse.next_run"]'); if (!lh.next_run_at) { nextEl.textContent = "—"; nextEl.removeAttribute("title"); } else if (lh.state === "running" || lh.state === "queued") { nextEl.textContent = "— (running now)"; nextEl.title = formatAbsolute(lh.next_run_at); } else if (lh.is_overdue) { nextEl.innerHTML = `<span class="text-warning">due now</span>`; nextEl.title = formatAbsolute(lh.next_run_at); } else { nextEl.textContent = relativeTime(lh.next_run_at, serverNow); nextEl.title = formatAbsolute(lh.next_run_at); }}function updateRecrawlButton(data) { const btn = document.getElementById("recrawl-btn"); if (!btn) return; const state = data.crawler.state; // "overdue + idle" means the user already requested a recrawl but the // scheduler hasn't picked it up yet (up to ~30s). const waitingForScheduler = state === "idle" && data.crawler.is_overdue; const busy = state === "queued" || state === "running" || waitingForScheduler; btn.disabled = busy; const label = btn.querySelector(".recrawl-btn-label"); const spinner = btn.querySelector(".recrawl-btn-spinner"); if (busy) { spinner.classList.remove("d-none"); if (state === "running") { const n = data.crawler.pages_count || 0; label.textContent = n > 0 ? `Crawling (${n})` : "Crawling…"; } else if (state === "queued") { label.textContent = "Queued…"; } else { label.textContent = "Waiting for scheduler…"; } } else { spinner.classList.add("d-none"); label.textContent = "Recrawl"; }}function updateRerunLighthouseButton(data) { const btn = document.getElementById("rerun-lighthouse-btn"); if (!btn) return; const state = data.lighthouse.state; const waitingForScheduler = state === "idle" && data.lighthouse.is_overdue; const busy = state === "queued" || state === "running" || waitingForScheduler; btn.disabled = busy; const label = btn.querySelector(".rerun-lh-label"); const spinner = btn.querySelector(".rerun-lh-spinner"); if (busy) { spinner.classList.remove("d-none"); if (state === "running") label.textContent = "Running"; else if (state === "queued") label.textContent = "Queued"; else label.textContent = "Waiting…"; } else { spinner.classList.add("d-none"); label.textContent = "Rerun"; }}function getCsrfToken() { const input = document.querySelector("input[name=csrfmiddlewaretoken]"); return input ? input.value : "";}async function triggerPost(url, onDone) { try { const res = await fetch(url, { method: "POST", headers: { "X-CSRFToken": getCsrfToken(), "Accept": "application/json", }, credentials: "same-origin", }); if (!res.ok) { console.error("POST failed", url, res.status); return; } const data = await res.json(); if (onDone) onDone(data); } catch (err) { console.error("POST error", url, err); }}document.addEventListener("DOMContentLoaded", function () { const root = document.getElementById("monitoring-status"); if (!root) return; const statusUrl = root.dataset.statusUrl; const recrawlUrl = root.dataset.recrawlUrl; const rerunLighthouseUrl = root.dataset.rerunLighthouseUrl; let prevCrawlState = null; let prevLhState = null; let timer = null; function schedule(data) { const active = data.crawler.state !== "idle" || data.lighthouse.state !== "idle" || data.crawler.is_overdue || data.lighthouse.is_overdue; const delay = active ? FAST_POLL_MS : SLOW_POLL_MS; clearTimeout(timer); timer = setTimeout(poll, delay); } function applyData(data) { const serverNow = data.server_time ? new Date(data.server_time).getTime() : Date.now(); renderCrawler(root, data.crawler, serverNow); renderLighthouse(root, data.lighthouse, serverNow); updateRecrawlButton(data); updateRerunLighthouseButton(data); // If either subsystem just went idle after being active, refresh the // page once so server-rendered charts/insights update. const crawlerFinished = prevCrawlState && prevCrawlState !== "idle" && data.crawler.state === "idle"; const lhFinished = prevLhState && prevLhState !== "idle" && data.lighthouse.state === "idle"; prevCrawlState = data.crawler.state; prevLhState = data.lighthouse.state; if (crawlerFinished || lhFinished) { window.location.reload(); return; } schedule(data); } async function poll() { try { const res = await fetch(statusUrl, { credentials: "same-origin", headers: { Accept: "application/json" }, }); if (!res.ok) { timer = setTimeout(poll, SLOW_POLL_MS); return; } const data = await res.json(); applyData(data); } catch (err) { console.error("status poll failed", err); timer = setTimeout(poll, SLOW_POLL_MS); } } const recrawlBtn = document.getElementById("recrawl-btn"); if (recrawlBtn && recrawlUrl) { recrawlBtn.addEventListener("click", function () { recrawlBtn.disabled = true; triggerPost(recrawlUrl, function (data) { applyData(data); }); }); } const rerunLhBtn = document.getElementById("rerun-lighthouse-btn"); if (rerunLhBtn && rerunLighthouseUrl) { rerunLhBtn.addEventListener("click", function () { rerunLhBtn.disabled = true; triggerPost(rerunLighthouseUrl, function (data) { applyData(data); }); }); } poll();});
modified
properties/templates/properties/property.html
@@ -53,9 +53,10 @@ <a href="{% url 'property' property.id %}?report" target="_blank" class="btn btn-sm btn-primary ms-0 ms-lg-3 my-1"> Report </a> <a href="{% url 'property' property.id %}?recrawl" class="btn btn-sm btn-primary ms-0 ms-lg-3 my-1"> Recrawl </a> <button type="button" id="recrawl-btn" class="btn btn-sm btn-primary ms-0 ms-lg-3 my-1" data-property-id="{{ property.id }}"> <span class="recrawl-btn-label">Recrawl</span> <span class="recrawl-btn-spinner spinner-border spinner-border-sm ms-1 d-none" role="status" aria-hidden="true"></span> </button> {% endif %} {% if not property.is_protected and user.is_authenticated %} <button type="button" class="btn btn-sm btn-outline-danger ms-1 ms-lg-3 my-1" data-bs-toggle="modal" data-bs-target="#delete-modal-{{ property.id }}">
@@ -129,14 +130,79 @@</div>{% endif %}{% if property.last_lighthouse_error and user.is_authenticated %}<div class="container-fluid mb-4"> <div class="alert alert-warning mb-0" role="alert"> <strong>Last Lighthouse run failed:</strong> <code>{{ property.last_lighthouse_error }}</code> {% if property.last_lighthouse_run_at %} <span class="text-muted small d-block">attempted {{ property.last_lighthouse_run_at|timesince }} ago</span> {% endif %}{% if user.is_authenticated %}<form style="display:none">{% csrf_token %}</form><div class="container mt-4 d-print-none" id="monitoring-status" data-property-id="{{ property.id }}" data-status-url="{% url 'property_status' property.id %}" data-recrawl-url="{% url 'property_recrawl' property.id %}" data-rerun-lighthouse-url="{% url 'property_rerun_lighthouse' property.id %}"> <div class="row g-3"> <div class="col-12 col-md-6"> <div class="card h-100"> <div class="card-header bg-dark text-white d-flex justify-content-between align-items-center"> <strong>Crawler</strong> <span class="badge" data-field="crawler.state_badge"> </span> </div> <div class="card-body"> <div class="progress mb-3 d-none" data-field="crawler.progress_wrap" style="height: 6px;"> <div class="progress-bar progress-bar-striped progress-bar-animated" role="progressbar" data-field="crawler.progress_bar" style="width: 0%"></div> </div> <div class="alert alert-danger py-2 mb-3 d-none small" data-field="crawler.error_box" role="alert"> <strong>Last crawl failed:</strong> <code data-field="crawler.error_text"></code> </div> <dl class="row small mb-0"> <dt class="col-5 col-sm-5 text-muted">Last success</dt> <dd class="col-7 col-sm-7 mb-1" data-field="crawler.last_success">—</dd> <dt class="col-5 col-sm-5 text-muted">Last attempt</dt> <dd class="col-7 col-sm-7 mb-1" data-field="crawler.last_attempt">—</dd> <dt class="col-5 col-sm-5 text-muted">Pages crawled</dt> <dd class="col-7 col-sm-7 mb-1" data-field="crawler.pages">—</dd> <dt class="col-5 col-sm-5 text-muted">Duration</dt> <dd class="col-7 col-sm-7 mb-1" data-field="crawler.duration">—</dd> <dt class="col-5 col-sm-5 text-muted">Issues found</dt> <dd class="col-7 col-sm-7 mb-1" data-field="crawler.insights">—</dd> <dt class="col-5 col-sm-5 text-muted">Next run</dt> <dd class="col-7 col-sm-7 mb-0" data-field="crawler.next_run">—</dd> </dl> </div> </div> </div> <div class="col-12 col-md-6"> <div class="card h-100"> <div class="card-header bg-dark text-white d-flex justify-content-between align-items-center"> <strong>Lighthouse</strong> <span class="d-flex align-items-center gap-2"> <span class="badge" data-field="lighthouse.state_badge"> </span> <button type="button" id="rerun-lighthouse-btn" class="btn btn-sm btn-outline-light py-0" title="Rerun Lighthouse now"> <span class="rerun-lh-label">Rerun</span> <span class="rerun-lh-spinner spinner-border spinner-border-sm ms-1 d-none" role="status" aria-hidden="true"></span> </button> </span> </div> <div class="card-body"> <div class="alert alert-warning py-2 mb-3 d-none small" data-field="lighthouse.error_box" role="alert"> <strong>Last Lighthouse run failed:</strong> <code data-field="lighthouse.error_text"></code> </div> <dl class="row small mb-0"> <dt class="col-5 col-sm-5 text-muted">Last success</dt> <dd class="col-7 col-sm-7 mb-1" data-field="lighthouse.last_success">—</dd> <dt class="col-5 col-sm-5 text-muted">Last attempt</dt> <dd class="col-7 col-sm-7 mb-1" data-field="lighthouse.last_attempt">—</dd> <dt class="col-5 col-sm-5 text-muted">Duration</dt> <dd class="col-7 col-sm-7 mb-1" data-field="lighthouse.duration">—</dd> <dt class="col-5 col-sm-5 text-muted">Next run</dt> <dd class="col-7 col-sm-7 mb-0" data-field="lighthouse.next_run">—</dd> </dl> </div> </div> </div> </div></div>{% endif %}
modified
properties/urls.py
@@ -7,6 +7,9 @@ urlpatterns = [ path('<uuid:property_id>/', views.property, name='property'), path('<uuid:property_id>/delete/', views.property_delete, name='property_delete'), path('<uuid:property_id>/is-public/', views.adjust_is_public_property, name='adjust_is_public_property'), path('<uuid:property_id>/status/', views.property_status, name='property_status'), path('<uuid:property_id>/recrawl/', views.property_recrawl, name='property_recrawl'), path('<uuid:property_id>/rerun-lighthouse/', views.property_rerun_lighthouse, name='property_rerun_lighthouse'), path('import/', views.import_properties, name='import_properties'), path('', views.properties, name='properties'),]
modified
properties/views.py
@@ -104,12 +104,6 @@ def property(request, property_id): if not property_obj.is_public and property_obj.user != request.user: return redirect("properties") if property_obj.user == request.user and request.GET.get('recrawl') == '': property_obj.next_run_at_crawler = timezone.now() property_obj.save() messages.success(request, "This property will be recrawled shortly.") return redirect("property", property_id=property_id) # Set some basic page context variables context["title"] = property_obj.name context["description"] = "Status for " + property_obj.name
@@ -158,6 +152,139 @@ def property(request, property_id): return render(request, "properties/property.html", context)def _crawl_progress(property_obj): """Return the fraction (0-1) of the discovered work that's complete.""" from crawler.fetcher import PAGE_CAP pages = property_obj.last_crawl_pages_count or 0 if pages <= 0: return 0.05 # show *some* movement once we start # We don't know the total ahead of time, so use a log-ish ratio capped at # ~90% — the last 10% is reserved for post-crawl check processing. return min(pages / PAGE_CAP, 0.9)def _serialize_status(property_obj): now = timezone.now() crawl_next = property_obj.next_run_at_crawler lh_next = property_obj.next_lighthouse_run_at insights = property_obj.crawler_insights or [] severity_counts = {"error": 0, "warning": 0, "info": 0} for insight in insights: sev = insight.get("severity", "info") if sev in severity_counts: severity_counts[sev] += 1 return { "crawler": { "state": property_obj.crawl_state, "started_at": property_obj.crawl_started_at.isoformat() if property_obj.crawl_started_at else None, "last_attempt_at": property_obj.last_run_at_crawler.isoformat() if property_obj.last_run_at_crawler else None, "last_success_at": property_obj.last_crawl_success_at.isoformat() if property_obj.last_crawl_success_at else None, "last_error": property_obj.last_crawl_error, "last_duration_ms": property_obj.last_crawl_duration_ms, "pages_count": property_obj.last_crawl_pages_count, "next_run_at": crawl_next.isoformat() if crawl_next else None, "is_overdue": bool(crawl_next and crawl_next <= now), "insights_total": len(insights), "insights_by_severity": severity_counts, "progress": _crawl_progress(property_obj) if property_obj.crawl_state == "running" else None, }, "lighthouse": { "state": property_obj.lighthouse_state, "started_at": property_obj.lighthouse_started_at.isoformat() if property_obj.lighthouse_started_at else None, "last_attempt_at": property_obj.last_lighthouse_run_at.isoformat() if property_obj.last_lighthouse_run_at else None, "last_success_at": property_obj.last_lighthouse_success_at.isoformat() if property_obj.last_lighthouse_success_at else None, "last_error": property_obj.last_lighthouse_error, "last_duration_ms": property_obj.last_lighthouse_duration_ms, "next_run_at": lh_next.isoformat() if lh_next else None, "is_overdue": bool(lh_next and lh_next <= now), "scores": property_obj.lighthouse_scores, }, "server_time": now.isoformat(), }def property_status(request, property_id): try: property_obj = Property.objects.get(pk=property_id) except Property.DoesNotExist: return JsonResponse({"error": "not_found"}, status=404) if not property_obj.is_public and property_obj.user != request.user: return JsonResponse({"error": "forbidden"}, status=403) return JsonResponse(_serialize_status(property_obj))def property_recrawl(request, property_id): if not request.user.is_authenticated: return JsonResponse({"error": "forbidden"}, status=403) if request.method != "POST": return JsonResponse({"error": "method_not_allowed"}, status=405) try: property_obj = request.user.properties.get(pk=property_id) except Property.DoesNotExist: return JsonResponse({"error": "not_found"}, status=404) if property_obj.crawl_state in ("queued", "running"): return JsonResponse( { "ok": False, "reason": "already_running", **_serialize_status(property_obj), } ) property_obj.next_run_at_crawler = timezone.now() property_obj.save(update_fields=["next_run_at_crawler"]) return JsonResponse({"ok": True, **_serialize_status(property_obj)})def property_rerun_lighthouse(request, property_id): if not request.user.is_authenticated: return JsonResponse({"error": "forbidden"}, status=403) if request.method != "POST": return JsonResponse({"error": "method_not_allowed"}, status=405) try: property_obj = request.user.properties.get(pk=property_id) except Property.DoesNotExist: return JsonResponse({"error": "not_found"}, status=404) if property_obj.lighthouse_state in ("queued", "running"): return JsonResponse( { "ok": False, "reason": "already_running", **_serialize_status(property_obj), } ) property_obj.next_lighthouse_run_at = timezone.now() property_obj.save(update_fields=["next_lighthouse_run_at"]) return JsonResponse({"ok": True, **_serialize_status(property_obj)})def import_property(request, url): url = url.lower().strip() if not url.startswith("http"):