modified
properties/management/commands/scheduler.py
@@ -44,10 +44,16 @@ class Command(BaseCommand): q_status.put((property_id, property_type)) def queue_process(self): # Cap on join() so a wedged lighthouse/crawler can't freeze the queue # indefinitely. Must exceed status.lighthouse.SUBPROCESS_TIMEOUT_SECONDS # so a normal slow run still completes inside the window. JOIN_TIMEOUT = 300 while True: if not q.empty(): threads = [] for i in range(2): if q.empty(): break q_data = q.get() if q_data[1] == "lighthouse": t = threading.Thread(target=self.thread_target_lighthouse, args=(q_data[0],))
@@ -57,7 +63,11 @@ class Command(BaseCommand): t.start() threads.append(t) for t in threads: t.join() t.join(timeout=JOIN_TIMEOUT) if t.is_alive(): self.stdout.write( "[Scheduler] Thread still running after {}s, abandoning".format(JOIN_TIMEOUT) ) q.task_done() time.sleep(1)
added
properties/migrations/0008_lighthouse_error_tracking.py
@@ -0,0 +1,21 @@from django.db import migrations, modelsclass Migration(migrations.Migration): dependencies = [ ('properties', '0007_add_alert_state_tracking'), ] operations = [ migrations.AddField( model_name='property', name='last_lighthouse_success_at', field=models.DateTimeField(blank=True, null=True), ), migrations.AddField( model_name='property', name='last_lighthouse_error', field=models.TextField(blank=True, null=True), ), ]
modified
properties/models.py
@@ -14,7 +14,11 @@ from django.utils.functional import cached_propertyfrom django.conf import settingsfrom crawler.runner import run_seo_spiderfrom status.lighthouse import fetch_lighthouse_results, parse_lighthouse_resultsfrom status.lighthouse import ( LighthouseError, fetch_lighthouse_results, parse_lighthouse_results,)logger = logging.getLogger(__name__)
@@ -360,6 +364,8 @@ class Property(CrawlerMixin, AlertsMixin, SecurityMixin, models.Model): lighthouse_scores = models.JSONField(blank=True, null=True) last_lighthouse_run_at = models.DateTimeField(blank=True, null=True) last_lighthouse_success_at = models.DateTimeField(blank=True, null=True) last_lighthouse_error = models.TextField(blank=True, null=True) next_lighthouse_run_at = models.DateTimeField(blank=True, null=True) # Alert state tracking
@@ -453,11 +459,27 @@ class Property(CrawlerMixin, AlertsMixin, SecurityMixin, models.Model): try: results = fetch_lighthouse_results(self.url) scores = parse_lighthouse_results(results) if scores: self.lighthouse_scores = scores self.save(update_fields=["lighthouse_scores"]) except Exception: pass except LighthouseError as e: logger.warning("Lighthouse failed for %s: %s", self.url, e) self.last_lighthouse_error = str(e) self.save(update_fields=["last_lighthouse_error"]) return except Exception as e: logger.exception("Unexpected lighthouse error for %s", self.url) self.last_lighthouse_error = f"{type(e).__name__}: {e}" self.save(update_fields=["last_lighthouse_error"]) return self.lighthouse_scores = scores self.last_lighthouse_success_at = timezone.now() self.last_lighthouse_error = None self.save( update_fields=[ "lighthouse_scores", "last_lighthouse_success_at", "last_lighthouse_error", ] ) @property def total_checks(self):
modified
properties/templates/properties/property.html
@@ -129,6 +129,18 @@</div>{% endif %}{% if property.last_lighthouse_error and user.is_authenticated %}<div class="container-fluid mb-4"> <div class="alert alert-warning mb-0" role="alert"> <strong>Last Lighthouse run failed:</strong> <code>{{ property.last_lighthouse_error }}</code> {% if property.last_lighthouse_run_at %} <span class="text-muted small d-block">attempted {{ property.last_lighthouse_run_at|timesince }} ago</span> {% endif %} </div></div>{% endif %}<div class="container mt-4"> <div class="row"> <div class="col-12 col-md-8">
modified
status/lighthouse.py
@@ -1,27 +1,75 @@"""A basic wrapper around the lighthouse node module.A wrapper around the lighthouse node CLI.Raises LighthouseError with a descriptive message on failure so callers canlog/persist the reason instead of silently dropping the result."""import jsonimport loggingimport shutilimport subprocessfrom django.conf import settingslogger = logging.getLogger(__name__)# Lighthouse's chrome-launcher searches for a browser on its own; we only# pin CHROME_PATH when we can resolve a known name, for determinism in# production (Alpine ships `chromium`). If nothing is found, fall through# and let chrome-launcher do its own lookup.CHROMIUM_BINARY = ( shutil.which("chromium") or shutil.which("chromium-browser") or shutil.which("google-chrome"))CHROME_FLAGS = "--headless --no-sandbox --disable-dev-shm-usage --disable-gpu"# Lighthouse itself can take 60-90s on a slow site; the outer timeout is a# backstop against Chromium hangs that would otherwise wedge the scheduler.SUBPROCESS_TIMEOUT_SECONDS = 180class LighthouseError(Exception): passdef fetch_lighthouse_results(url): command = [ f"{settings.BASE_DIR}/node_modules/.bin/lighthouse", url, '--chrome-flags="--headless --no-sandbox --disable-dev-shm-usage"', '--output="json"', '--output-path="stdout"', '--max-wait-for-load=5000', f"--chrome-flags={CHROME_FLAGS}", "--output=json", "--output-path=stdout", "--quiet", ] env = {"PATH": "/usr/bin:/bin:/usr/local/bin"} if CHROMIUM_BINARY: env["CHROME_PATH"] = CHROMIUM_BINARY try: process = subprocess.run(command, check=True, stdout=subprocess.PIPE) except subprocess.CalledProcessError: return None return json.loads(process.stdout) process = subprocess.run( command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=SUBPROCESS_TIMEOUT_SECONDS, env=env, ) except subprocess.TimeoutExpired: raise LighthouseError(f"lighthouse timed out after {SUBPROCESS_TIMEOUT_SECONDS}s") except subprocess.CalledProcessError as e: stderr = (e.stderr or b"").decode("utf-8", errors="replace").strip() raise LighthouseError(f"lighthouse exited {e.returncode}: {stderr[-500:]}") except FileNotFoundError as e: raise LighthouseError(f"lighthouse binary missing: {e}") try: return json.loads(process.stdout) except json.JSONDecodeError as e: raise LighthouseError(f"could not parse lighthouse output: {e}")def parse_lighthouse_results(results):
@@ -32,7 +80,11 @@ def parse_lighthouse_results(results): "Best practices": results["categories"]["best-practices"]["score"], "SEO": results["categories"]["seo"]["score"], } except KeyError: return None scores = {k: round(v * 100) for k, v in scores.items()} return scores except KeyError as e: raise LighthouseError(f"missing category in lighthouse output: {e}") if any(v is None for v in scores.values()): missing = [k for k, v in scores.items() if v is None] raise LighthouseError(f"null score(s) returned by lighthouse: {missing}") return {k: round(v * 100) for k, v in scores.items()}