diff --git a/src/cf_map.rs b/src/cf_map.rs new file mode 100644 index 0000000..d1eaadc --- /dev/null +++ b/src/cf_map.rs @@ -0,0 +1,252 @@ +//! Unified, cross-environment Cloudflare map. +//! +//! Where [`crate::cf_snapshot`] scopes to the serving CP's own env, this +//! enumerates EVERY `dd-*` resource in the account and attributes each to +//! its installation (`production`, every `pr-N`, `bot`, `dogfood`, …) by +//! parsing the env out of the name ([`Env::from_resource_name`]). DNS is +//! attributed by the tunnel its CNAME targets. Resources whose env can't +//! be parsed, or whose CNAME points at a tunnel we no longer have, land in +//! an `(unattributed)` bucket — the leaked state a per-env view can't see. +//! +//! It is built from the Cloudflare API directly, so it reflects reality +//! regardless of which control planes are currently up — the basis for +//! treating CF as the source of truth during recovery. Read-only. + +use std::collections::BTreeMap; +use std::sync::Arc; +use std::time::SystemTime; + +use reqwest::Client; +use serde::Serialize; + +use crate::cf; +use crate::cf_snapshot::{infer_policy_kind, CfApp, CfDns, CfTunnel}; +use crate::collector; +use crate::config::CfCreds; +use crate::env::Env; + +const UNATTRIBUTED: &str = "(unattributed)"; + +#[derive(Debug, Clone, Serialize)] +pub struct CfMap { + pub fetched_at: String, + pub cf_account_id: String, + pub cf_zone_id: String, + /// True if any of the three CF list calls failed → partial view. + pub degraded: bool, + pub cf_fetch_errors: Vec, + /// The env this map was served from (its CP enriches its own agents). + pub serving_env: String, + /// Installations sorted by env label, with `(unattributed)` last. + pub installations: Vec, +} + +#[derive(Debug, Clone, Serialize)] +pub struct Installation { + pub env: String, + /// `production` | `staging` | `dev` | `preview` | `named` | + /// `unattributed`. + pub kind: String, + /// A non-soft-deleted `dd-{env}-cp-*` tunnel exists → the control + /// plane for this env is present. `false` = likely leaked (a + /// torn-down env whose resources linger). + pub has_live_cp: bool, + /// The env this CP serves (its in-memory agent store is authoritative + /// for the live-agent count below). + pub is_serving_env: bool, + /// Agents the serving CP currently knows for this env (0 for others — + /// a CP only holds its own env's store). + pub known_agents: usize, + pub tunnels: Vec, + pub dns: Vec, + pub apps: Vec, +} + +#[derive(Default)] +struct Bucket { + tunnels: Vec, + dns: Vec, + apps: Vec, +} + +/// Build the cross-env map from a fresh CF read. Each list call is +/// independently fallible (mirrors [`crate::cf_snapshot`]); a partial read +/// sets `degraded`, and a downstream reconcile must refuse to act on it. +pub async fn build_map( + http: &Client, + cf_creds: &CfCreds, + serving_env: &str, + store: &Arc>>, +) -> CfMap { + let mut fetch_errors = Vec::new(); + let raw_tunnels = match cf::list(http, cf_creds).await { + Ok(v) => v, + Err(e) => { + fetch_errors.push(format!("tunnels: {e}")); + Vec::new() + } + }; + let raw_dns = match cf::list_dns_records(http, cf_creds).await { + Ok(v) => v, + Err(e) => { + fetch_errors.push(format!("dns: {e}")); + Vec::new() + } + }; + let raw_apps = match cf::list_access_apps(http, cf_creds).await { + Ok(v) => v, + Err(e) => { + fetch_errors.push(format!("apps: {e}")); + Vec::new() + } + }; + + let mut buckets: BTreeMap = BTreeMap::new(); + // tunnel id → env label, so DNS (which references a tunnel by id in its + // CNAME content) can be attributed to the same installation. + let mut tunnel_env: std::collections::HashMap = + std::collections::HashMap::new(); + + // Tunnels — attribute by name; unparseable dd-* (and any stray) → unattributed. + for t in &raw_tunnels { + let (Some(id), Some(name)) = ( + t.get("id").and_then(|v| v.as_str()), + t.get("name").and_then(|v| v.as_str()), + ) else { + continue; + }; + // Only consider DD-owned tunnels; ignore unrelated tunnels in the account. + if !name.starts_with("dd-") { + continue; + } + let env = Env::from_resource_name(name) + .map(|e| e.label().to_string()) + .unwrap_or_else(|| UNATTRIBUTED.to_string()); + let cft = CfTunnel { + id: id.to_string(), + name: name.to_string(), + deleted_at: t + .get("deleted_at") + .and_then(|v| v.as_str()) + .map(String::from), + }; + tunnel_env.insert(cft.id.clone(), env.clone()); + buckets.entry(env).or_default().tunnels.push(cft); + } + + // DNS — attribute by the tunnel its CNAME targets. A ref we no longer + // have a tunnel for is a leaked record → unattributed. Non-tunnel + // CNAMEs in the zone aren't ours; skip. + for r in &raw_dns { + let (Some(id), Some(name), Some(content)) = ( + r.get("id").and_then(|v| v.as_str()), + r.get("name").and_then(|v| v.as_str()), + r.get("content").and_then(|v| v.as_str()), + ) else { + continue; + }; + let Some(tunnel_id_ref) = content.strip_suffix(".cfargotunnel.com").map(String::from) + else { + continue; // not a tunnel CNAME → not part of the DD map + }; + let env = tunnel_env + .get(&tunnel_id_ref) + .cloned() + .unwrap_or_else(|| UNATTRIBUTED.to_string()); + let dns = CfDns { + id: id.to_string(), + name: name.to_string(), + content: content.to_string(), + proxied: r.get("proxied").and_then(|v| v.as_bool()).unwrap_or(false), + tunnel_id_ref: Some(tunnel_id_ref), + }; + buckets.entry(env).or_default().dns.push(dns); + } + + // Access apps — usually empty post-#274 (auth moved in-code). Attribute + // DD-owned apps by name; skip unrelated zone apps. + for a in &raw_apps { + let (Some(id), Some(name), Some(domain)) = ( + a.get("id").and_then(|v| v.as_str()), + a.get("name").and_then(|v| v.as_str()), + a.get("domain").and_then(|v| v.as_str()), + ) else { + continue; + }; + if !name.starts_with("dd-") { + continue; + } + let env = Env::from_resource_name(name) + .map(|e| e.label().to_string()) + .unwrap_or_else(|| UNATTRIBUTED.to_string()); + let app = CfApp { + id: id.to_string(), + name: name.to_string(), + domain: domain.to_string(), + policy_kind: infer_policy_kind(a), + }; + buckets.entry(env).or_default().apps.push(app); + } + + // Serving env's live agent count (exclude the CP's own pseudo-entry). + let known_agents = { + let s = store.lock().await; + s.values().filter(|a| !a.tunnel_id.is_empty()).count() + }; + + let mut installations: Vec = buckets + .into_iter() + .map(|(env, b)| { + let unattributed = env == UNATTRIBUTED; + let kind = if unattributed { + "unattributed".to_string() + } else { + Env::parse(&env) + .map(|e| kind_str(&e)) + .unwrap_or("unattributed") + .to_string() + }; + let has_live_cp = b + .tunnels + .iter() + .any(|t| t.name.contains("-cp-") && t.deleted_at.is_none()); + let is_serving_env = !unattributed && env == serving_env; + Installation { + kind, + has_live_cp, + is_serving_env, + known_agents: if is_serving_env { known_agents } else { 0 }, + tunnels: b.tunnels, + dns: b.dns, + apps: b.apps, + env, + } + }) + .collect(); + // `(unattributed)` sorts after real labels because '(' < 'a'; force it last. + installations.sort_by(|a, b| { + let rank = |i: &Installation| (i.env == UNATTRIBUTED, i.env.clone()); + rank(a).cmp(&rank(b)) + }); + + CfMap { + fetched_at: chrono::DateTime::::from(SystemTime::now()).to_rfc3339(), + cf_account_id: cf_creds.account_id.clone(), + cf_zone_id: cf_creds.zone_id.clone(), + degraded: !fetch_errors.is_empty(), + cf_fetch_errors: fetch_errors, + serving_env: serving_env.to_string(), + installations, + } +} + +fn kind_str(e: &Env) -> &'static str { + use crate::env::EnvKind::*; + match e.kind() { + Production => "production", + Staging => "staging", + Dev => "dev", + Preview => "preview", + Named => "named", + } +} diff --git a/src/cf_snapshot.rs b/src/cf_snapshot.rs index 610e34f..75789e7 100644 --- a/src/cf_snapshot.rs +++ b/src/cf_snapshot.rs @@ -310,7 +310,7 @@ async fn build_cf_state( /// Heuristic: bypass apps have a single policy with `decision="bypass"` /// and `include` containing "everyone"; human apps have `decision="allow"` /// with GitHub-org inclusion. Anything else is `unknown`. -fn infer_policy_kind(app: &serde_json::Value) -> String { +pub(crate) fn infer_policy_kind(app: &serde_json::Value) -> String { let policies = app.get("policies").and_then(|p| p.as_array()); let Some(policies) = policies else { return "unknown".into(); diff --git a/src/cp.rs b/src/cp.rs index c90ac56..db354ca 100644 --- a/src/cp.rs +++ b/src/cp.rs @@ -349,6 +349,7 @@ pub async fn run() -> Result<()> { .route("/api/agents", get(api_agents)) .route("/api/fleet", get(fleet_fragment)) .route("/admin/cf/snapshot", get(cf_snapshot_handler)) + .route("/admin/cf/map", get(cf_map_handler)) .route("/api/v1/admin/export", get(export_state)) .route("/admin/enroll", get(enroll_page)) .with_state(state); @@ -1312,6 +1313,25 @@ async fn cf_snapshot_handler( Ok(Json(snap)) } +/// GET /admin/cf/map — unified cross-env view. Enumerates EVERY `dd-*` +/// Cloudflare resource in the account and groups it by installation +/// (production / each pr-N / bot / dogfood / unattributed), so prod and +/// PR previews — which share one CF account — are seen together. Built +/// from CF directly, so it reflects reality regardless of which CPs are +/// up. Same auth as `/admin/cf/snapshot`. Read-only. +async fn cf_map_handler( + State(s): State, + axum::extract::ConnectInfo(peer): axum::extract::ConnectInfo, + headers: axum::http::HeaderMap, +) -> Result> { + if !agents_auth_ok(&s, peer, &headers).await { + return Err(Error::Unauthorized); + } + let http = cf::http_client(); + let map = crate::cf_map::build_map(&http, &s.cfg.cf, s.cfg.common.env.label(), &s.store).await; + Ok(Json(map)) +} + /// Accept the request if the caller is on the loopback interface /// (same-VM trust — any CP-VM workload / dd-agent-proxy) or presents a valid /// bearer that verifies as either a GitHub Actions OIDC token for diff --git a/src/env.rs b/src/env.rs index d6cd271..c2b7bc6 100644 --- a/src/env.rs +++ b/src/env.rs @@ -95,6 +95,22 @@ impl Env { pub fn is_ephemeral(&self) -> bool { matches!(self.kind, EnvKind::Preview) } + + /// Attribute a Cloudflare resource to its installation by parsing the + /// env out of a `dd-{env}-{cp|agent|api}-…` name. The env segment is + /// everything between the `dd-` prefix and the first role marker, so a + /// hyphenated label like `pr-42` is recovered intact + /// (`dd-pr-42-agent-` → `pr-42`). Returns `None` for names that + /// don't follow the convention (so the caller can bucket them as + /// unattributed). Used by the cross-env CF map. + pub fn from_resource_name(name: &str) -> Option { + let rest = name.strip_prefix("dd-")?; + let cut = ["-cp-", "-agent-", "-api-"] + .iter() + .filter_map(|m| rest.find(m)) + .min()?; + Env::parse(&rest[..cut]).ok() + } } impl std::fmt::Display for Env { @@ -153,4 +169,27 @@ mod tests { assert!(!Env::parse("production").unwrap().is_ephemeral()); assert!(!Env::parse("bot").unwrap().is_ephemeral()); } + + #[test] + fn from_resource_name_attributes_env() { + let cases = [ + ("dd-production-cp-1a2b", "production"), + ("dd-production-agent-9f8e", "production"), + ("dd-pr-42-cp-aaaa", "pr-42"), // hyphenated label recovered + ("dd-pr-42-agent-bbbb", "pr-42"), + ("dd-pr-42-api-cccc.devopsdefender.com", "pr-42"), // api hostname + ("dd-bot-agent-dddd", "bot"), + ]; + for (name, want) in cases { + assert_eq!( + Env::from_resource_name(name).unwrap().label(), + want, + "name={name}" + ); + } + // Non-conforming names → None (caller buckets as unattributed). + assert!(Env::from_resource_name("app.devopsdefender.com").is_none()); + assert!(Env::from_resource_name("some-other-tunnel").is_none()); + assert!(Env::from_resource_name("dd-no-marker-here").is_none()); + } } diff --git a/src/lib.rs b/src/lib.rs index 8049194..f8f032e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ pub mod agent; pub mod auth; pub mod cf; +pub mod cf_map; pub mod cf_snapshot; pub mod collector; pub mod config;