17.6 KB
raw
// Seeds a "Seed Test" property with realistic-looking fake events.
//
// Usage:
// cargo run --bin seed # 6000 sessions, last 90 days
// cargo run --bin seed -- 12000 180 # 12000 sessions, last 180 days
//
// Sessions are timestamped with a gentle bias toward recent days so the
// dashboard's default 28-day window shows a small positive delta vs. the
// previous 28 days (rather than +1000% from comparing a full window to a
// barely-populated one). At default 6000/90 the visible 28-day window
// holds ~25k events and most metric-card deltas land in the ±20% range.
//
// Re-runs reuse the property and wipe its existing events first so the
// dashboard URL stays stable. The property's `custom_cards` are also
// rewritten on every run so a new seed always shows the demo cards.
#[path = "../db.rs"]
#[allow(dead_code)]
mod db;
use anyhow::Result;
use chrono::Utc;
use rand::prelude::*;
use sqlx::{SqliteConnection, SqlitePool};
use std::path::PathBuf;
use uuid::Uuid;
const PROPERTY_NAME: &str = "Seed Test";
const URLS: &[(&str, &str, u32)] = &[
("/", "Home", 40),
("/about", "About", 10),
("/pricing", "Pricing", 8),
("/docs", "Documentation", 8),
("/blog", "Blog", 5),
("/blog/getting-started", "Getting Started", 5),
("/blog/whats-new-in-v2", "What's New in v2", 4),
("/blog/case-studies", "Case Studies", 3),
("/contact", "Contact", 4),
("/login", "Log In", 4),
("/signup", "Sign Up", 4),
("/dashboard", "Dashboard", 5),
];
const REFERRERS: &[(&str, u32)] = &[
("", 50),
("google.com", 20),
("twitter.com", 5),
("news.ycombinator.com", 3),
("github.com", 3),
("reddit.com", 4),
("duckduckgo.com", 3),
("bing.com", 3),
("linkedin.com", 2),
("producthunt.com", 2),
("dev.to", 2),
("medium.com", 1),
];
struct Agent {
ua: &'static str,
platform: &'static str,
browser: &'static str,
device: &'static str,
is_bot: bool,
bot_name: Option<&'static str>,
weight: u32,
}
const AGENTS: &[Agent] = &[
Agent { ua: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
platform: "Windows", browser: "Chrome", device: "Desktop", is_bot: false, bot_name: None, weight: 25 },
Agent { ua: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
platform: "Mac OS X", browser: "Chrome", device: "Desktop", is_bot: false, bot_name: None, weight: 15 },
Agent { ua: "Mozilla/5.0 (Linux; Android 14; Pixel 8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Mobile Safari/537.36",
platform: "Android", browser: "Chrome Mobile", device: "Mobile", is_bot: false, bot_name: None, weight: 15 },
Agent { ua: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Safari/605.1.15",
platform: "Mac OS X", browser: "Safari", device: "Desktop", is_bot: false, bot_name: None, weight: 10 },
Agent { ua: "Mozilla/5.0 (iPhone; CPU iPhone OS 17_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Mobile/15E148 Safari/604.1",
platform: "iOS", browser: "Mobile Safari", device: "Mobile", is_bot: false, bot_name: None, weight: 15 },
Agent { ua: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0",
platform: "Windows", browser: "Firefox", device: "Desktop", is_bot: false, bot_name: None, weight: 5 },
Agent { ua: "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0",
platform: "Ubuntu", browser: "Firefox", device: "Desktop", is_bot: false, bot_name: None, weight: 3 },
Agent { ua: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0",
platform: "Windows", browser: "Edge", device: "Desktop", is_bot: false, bot_name: None, weight: 8 },
Agent { ua: "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
platform: "", browser: "", device: "", is_bot: true, bot_name: Some("Googlebot"), weight: 1 },
Agent { ua: "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)",
platform: "", browser: "", device: "", is_bot: true, bot_name: Some("bingbot"), weight: 1 },
Agent { ua: "facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)",
platform: "", browser: "", device: "", is_bot: true, bot_name: Some("facebookexternalhit"), weight: 1 },
];
struct GeoRow {
country: &'static str,
region: &'static str,
city: &'static str,
lat: f64,
lon: f64,
weight: u32,
}
const GEO: &[GeoRow] = &[
GeoRow { country: "US", region: "New York", city: "New York", lat: 40.7128, lon: -74.0060, weight: 15 },
GeoRow { country: "US", region: "California", city: "Los Angeles", lat: 34.0522, lon: -118.2437, weight: 10 },
GeoRow { country: "US", region: "California", city: "San Francisco", lat: 37.7749, lon: -122.4194, weight: 8 },
GeoRow { country: "US", region: "Illinois", city: "Chicago", lat: 41.8781, lon: -87.6298, weight: 5 },
GeoRow { country: "US", region: "Texas", city: "Austin", lat: 30.2672, lon: -97.7431, weight: 5 },
GeoRow { country: "GB", region: "England", city: "London", lat: 51.5074, lon: -0.1278, weight: 8 },
GeoRow { country: "GB", region: "England", city: "Manchester", lat: 53.4808, lon: -2.2426, weight: 2 },
GeoRow { country: "DE", region: "Berlin", city: "Berlin", lat: 52.5200, lon: 13.4050, weight: 5 },
GeoRow { country: "DE", region: "Bavaria", city: "Munich", lat: 48.1351, lon: 11.5820, weight: 3 },
GeoRow { country: "FR", region: "Île-de-France", city: "Paris", lat: 48.8566, lon: 2.3522, weight: 5 },
GeoRow { country: "CA", region: "Ontario", city: "Toronto", lat: 43.6532, lon: -79.3832, weight: 4 },
GeoRow { country: "CA", region: "British Columbia", city: "Vancouver", lat: 49.2827, lon: -123.1207, weight: 2 },
GeoRow { country: "AU", region: "New South Wales", city: "Sydney", lat: -33.8688, lon: 151.2093, weight: 3 },
GeoRow { country: "AU", region: "Victoria", city: "Melbourne", lat: -37.8136, lon: 144.9631, weight: 2 },
GeoRow { country: "JP", region: "Tokyo", city: "Tokyo", lat: 35.6762, lon: 139.6503, weight: 4 },
GeoRow { country: "BR", region: "São Paulo", city: "São Paulo", lat: -23.5505, lon: -46.6333, weight: 3 },
GeoRow { country: "IN", region: "Maharashtra", city: "Mumbai", lat: 19.0760, lon: 72.8777, weight: 3 },
GeoRow { country: "IN", region: "Karnataka", city: "Bangalore", lat: 12.9716, lon: 77.5946, weight: 3 },
GeoRow { country: "NL", region: "North Holland", city: "Amsterdam", lat: 52.3676, lon: 4.9041, weight: 3 },
GeoRow { country: "ES", region: "Madrid", city: "Madrid", lat: 40.4168, lon: -3.7038, weight: 2 },
GeoRow { country: "IT", region: "Lazio", city: "Rome", lat: 41.9028, lon: 12.4964, weight: 2 },
GeoRow { country: "MX", region: "Mexico City", city: "Mexico City", lat: 19.4326, lon: -99.1332, weight: 2 },
GeoRow { country: "KR", region: "Seoul", city: "Seoul", lat: 37.5665, lon: 126.9780, weight: 2 },
GeoRow { country: "SE", region: "Stockholm", city: "Stockholm", lat: 59.3293, lon: 18.0686, weight: 2 },
GeoRow { country: "PL", region: "Mazovia", city: "Warsaw", lat: 52.2297, lon: 21.0122, weight: 2 },
GeoRow { country: "TR", region: "Istanbul", city: "Istanbul", lat: 41.0082, lon: 28.9784, weight: 2 },
GeoRow { country: "ZA", region: "Gauteng", city: "Johannesburg", lat: -26.2041, lon: 28.0473, weight: 1 },
];
const SCREENS_DESKTOP: &[(i64, i64)] = &[
(1920, 1080), (1366, 768), (1440, 900), (1536, 864), (1680, 1050), (2560, 1440),
];
const SCREENS_MOBILE: &[(i64, i64)] = &[
(390, 844), (414, 896), (375, 667), (360, 800), (412, 915), (393, 851),
];
const UTM_SOURCES: &[&str] = &["google", "twitter", "hn", "newsletter", "github", "producthunt"];
const UTM_MEDIUMS: &[&str] = &["cpc", "social", "email", "referral", "organic"];
const UTM_CAMPAIGNS: &[&str] = &["launch-2026", "spring-promo", "blog-feature", "rebrand", "retarget"];
// Demo custom events emitted alongside page-views. Probabilities are
// per-session — at the default 2000 sessions this yields ~100 signups,
// ~40 checkouts, and ~160 CTA clicks, plenty to populate the cards.
const CUSTOM_EVENTS: &[(&str, f64)] = &[
("signup", 0.05),
("checkout_success", 0.02),
("signup_cta_click", 0.08),
];
fn weighted<'a, T>(rng: &mut impl Rng, items: &'a [T], weight: impl Fn(&T) -> u32) -> &'a T {
let total: u32 = items.iter().map(&weight).sum();
let mut pick = rng.gen_range(0..total);
for it in items {
let w = weight(it);
if pick < w {
return it;
}
pick -= w;
}
items.last().unwrap()
}
#[tokio::main]
async fn main() -> Result<()> {
let _ = dotenvy::dotenv();
let args: Vec<String> = std::env::args().collect();
// 6000 sessions over 90 days, biased toward recent. The default 28-day
// dashboard window catches ~34% of sessions (~2000 × ~12 events ≈ 25k).
// Override with `cargo run --bin seed -- <sessions> <days>`.
let sessions: usize = args.get(1).and_then(|s| s.parse().ok()).unwrap_or(6000);
let days: i64 = args.get(2).and_then(|s| s.parse().ok()).unwrap_or(90);
let data_dir = std::env::var("ANALYTICS_DATA_DIR")
.map(PathBuf::from)
.unwrap_or_else(|_| PathBuf::from("./data"));
std::fs::create_dir_all(&data_dir)?;
let pool = db::init(&data_dir).await?;
let property_id = ensure_property(&pool, PROPERTY_NAME).await?;
let pid_bytes = property_id.as_bytes().to_vec();
sqlx::query("DELETE FROM events WHERE property_id = ?")
.bind(&pid_bytes)
.execute(&pool)
.await?;
sqlx::query("DELETE FROM bot_events WHERE property_id = ?")
.bind(&pid_bytes)
.execute(&pool)
.await?;
// Always rewrite custom_cards so re-seeding wipes prior state. The shape
// matches CustomCard in models.rs ([{event, value: bool}]).
let cards_json = serde_json::Value::Array(
CUSTOM_EVENTS
.iter()
.map(|(name, _)| serde_json::json!({ "event": name, "value": true }))
.collect(),
)
.to_string();
sqlx::query("UPDATE properties SET custom_cards = ?, updated_at = ? WHERE id = ?")
.bind(&cards_json)
.bind(Utc::now().timestamp_millis())
.bind(&pid_bytes)
.execute(&pool)
.await?;
let total = generate(&pool, &pid_bytes, sessions, days).await?;
println!("Seeded {} sessions ({} events) into property '{}' ({})", sessions, total, PROPERTY_NAME, property_id);
println!("Dashboard: http://localhost:8000/{}", property_id);
Ok(())
}
async fn ensure_property(pool: &SqlitePool, name: &str) -> Result<Uuid> {
let existing: Option<(Vec<u8>,)> = sqlx::query_as("SELECT id FROM properties WHERE name = ?")
.bind(name)
.fetch_optional(pool)
.await?;
if let Some((bytes,)) = existing {
return Ok(Uuid::from_slice(&bytes)?);
}
let id = Uuid::new_v4();
let now = Utc::now().timestamp_millis();
sqlx::query(
"INSERT INTO properties (id, name, custom_cards, is_protected, is_public, created_at, updated_at) \
VALUES (?, ?, '[]', 0, 0, ?, ?)",
)
.bind(id.as_bytes().to_vec())
.bind(name)
.bind(now)
.bind(now)
.execute(pool)
.await?;
Ok(id)
}
async fn generate(pool: &SqlitePool, pid: &[u8], sessions: usize, days: i64) -> Result<u64> {
let mut rng = thread_rng();
let now = Utc::now().timestamp_millis();
let window_ms: i64 = days * 24 * 60 * 60 * 1000;
let mut total = 0u64;
let mut tx = pool.begin().await?;
for _ in 0..sessions {
let agent = weighted(&mut rng, AGENTS, |a| a.weight);
let geo = weighted(&mut rng, GEO, |g| g.weight);
let referrer_str = weighted(&mut rng, REFERRERS, |r| r.1).0;
let referrer = if referrer_str.is_empty() { None } else { Some(referrer_str) };
let user_id = format!("{}", rng.gen_range(100_000_000u64..999_999_999u64));
// r.powf(1.15) gently biases toward 0, putting more sessions in the
// recent end of the window. Empirically yields ~10–15% growth comparing
// the most-recent 28 days to the previous 28 days, which matches what
// a real site looks like — instead of the +1000% you'd get from a
// uniform 30-day seed where the prev window is mostly empty.
let r: f64 = rng.gen();
let offset_ms = (r.powf(1.15) * window_ms as f64) as i64;
let session_start = now - offset_ms;
let (sw, sh) = if agent.device == "Mobile" {
*SCREENS_MOBILE.choose(&mut rng).unwrap()
} else {
*SCREENS_DESKTOP.choose(&mut rng).unwrap()
};
let (utm_source, utm_medium, utm_campaign) = if rng.gen_bool(0.3) {
(
Some(*UTM_SOURCES.choose(&mut rng).unwrap()),
Some(*UTM_MEDIUMS.choose(&mut rng).unwrap()),
Some(*UTM_CAMPAIGNS.choose(&mut rng).unwrap()),
)
} else {
(None, None, None)
};
if agent.is_bot {
let url_pick = weighted(&mut rng, URLS, |u| u.2);
sqlx::query(
"INSERT INTO bot_events (property_id, event, created_at, bot_name, url, user_agent, country, extra) \
VALUES (?,?,?,?,?,?,?,'{}')",
)
.bind(pid)
.bind("page_view")
.bind(session_start)
.bind(agent.bot_name)
.bind(url_pick.0)
.bind(agent.ua)
.bind(geo.country)
.execute(&mut *tx)
.await?;
total += 1;
continue;
}
let page_count = rng.gen_range(1..=8usize);
let mut t = session_start;
let mut url_pick = weighted(&mut rng, URLS, |u| u.2);
insert_human(&mut tx, pid, "session_start", t, &user_id, url_pick.0, url_pick.1,
referrer, agent, sw, sh, geo, utm_source, utm_medium, utm_campaign, None).await?;
total += 1;
// Emit demo custom events at their per-session probability. Bucketed
// a few seconds after the session start so they fall inside the
// active window and show up on the dashboard's custom-event cards.
for (name, prob) in CUSTOM_EVENTS {
if rng.gen_bool(*prob) {
let offset = rng.gen_range(1_000i64..30_000);
insert_human(&mut tx, pid, name, t + offset, &user_id, url_pick.0, url_pick.1,
None, agent, sw, sh, geo, None, None, None, None).await?;
total += 1;
}
}
for i in 0..page_count {
let time_on_page = rng.gen_range(2_000i64..120_000i64);
let pv_referrer = if i == 0 { referrer } else { None };
insert_human(&mut tx, pid, "page_view", t, &user_id, url_pick.0, url_pick.1,
pv_referrer, agent, sw, sh, geo, utm_source, utm_medium, utm_campaign, None).await?;
total += 1;
if rng.gen_bool(0.4) {
let click_offset = rng.gen_range(500..time_on_page.max(1001));
insert_human(&mut tx, pid, "click", t + click_offset, &user_id, url_pick.0, url_pick.1,
None, agent, sw, sh, geo, None, None, None, None).await?;
total += 1;
}
insert_human(&mut tx, pid, "page_leave", t + time_on_page, &user_id, url_pick.0, url_pick.1,
None, agent, sw, sh, geo, None, None, None, Some(time_on_page)).await?;
total += 1;
t += time_on_page + rng.gen_range(500..3000);
if i + 1 < page_count {
url_pick = weighted(&mut rng, URLS, |u| u.2);
}
}
}
tx.commit().await?;
Ok(total)
}
#[allow(clippy::too_many_arguments)]
async fn insert_human(
tx: &mut sqlx::Transaction<'_, sqlx::Sqlite>,
pid: &[u8],
event: &str,
created_at: i64,
user_id: &str,
url: &str,
title: &str,
referrer: Option<&str>,
agent: &Agent,
screen_w: i64,
screen_h: i64,
geo: &GeoRow,
utm_source: Option<&str>,
utm_medium: Option<&str>,
utm_campaign: Option<&str>,
time_on_page_ms: Option<i64>,
) -> Result<()> {
let conn: &mut SqliteConnection = &mut *tx;
sqlx::query(
"INSERT INTO events (\
property_id, event, created_at, user_id, url, title, referrer, user_agent, \
platform, browser, device, screen_width, screen_height, country, region, city, \
lat, lon, utm_source, utm_medium, utm_campaign, time_on_page_ms, extra\
) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,'{}')",
)
.bind(pid)
.bind(event)
.bind(created_at)
.bind(user_id)
.bind(url)
.bind(title)
.bind(referrer)
.bind(agent.ua)
.bind(agent.platform)
.bind(agent.browser)
.bind(agent.device)
.bind(screen_w)
.bind(screen_h)
.bind(geo.country)
.bind(geo.region)
.bind(geo.city)
.bind(geo.lat)
.bind(geo.lon)
.bind(utm_source)
.bind(utm_medium)
.bind(utm_campaign)
.bind(time_on_page_ms)
.execute(conn)
.await?;
Ok(())
}