Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
252 changes: 252 additions & 0 deletions src/cf_map.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
//! Unified, cross-environment Cloudflare map.
//!
//! Where [`crate::cf_snapshot`] scopes to the serving CP's own env, this
//! enumerates EVERY `dd-*` resource in the account and attributes each to
//! its installation (`production`, every `pr-N`, `bot`, `dogfood`, …) by
//! parsing the env out of the name ([`Env::from_resource_name`]). DNS is
//! attributed by the tunnel its CNAME targets. Resources whose env can't
//! be parsed, or whose CNAME points at a tunnel we no longer have, land in
//! an `(unattributed)` bucket — the leaked state a per-env view can't see.
//!
//! It is built from the Cloudflare API directly, so it reflects reality
//! regardless of which control planes are currently up — the basis for
//! treating CF as the source of truth during recovery. Read-only.

use std::collections::BTreeMap;
use std::sync::Arc;
use std::time::SystemTime;

use reqwest::Client;
use serde::Serialize;

use crate::cf;
use crate::cf_snapshot::{infer_policy_kind, CfApp, CfDns, CfTunnel};
use crate::collector;
use crate::config::CfCreds;
use crate::env::Env;

const UNATTRIBUTED: &str = "(unattributed)";

#[derive(Debug, Clone, Serialize)]
pub struct CfMap {
pub fetched_at: String,
pub cf_account_id: String,
pub cf_zone_id: String,
/// True if any of the three CF list calls failed → partial view.
pub degraded: bool,
pub cf_fetch_errors: Vec<String>,
/// The env this map was served from (its CP enriches its own agents).
pub serving_env: String,
/// Installations sorted by env label, with `(unattributed)` last.
pub installations: Vec<Installation>,
}

#[derive(Debug, Clone, Serialize)]
pub struct Installation {
pub env: String,
/// `production` | `staging` | `dev` | `preview` | `named` |
/// `unattributed`.
pub kind: String,
/// A non-soft-deleted `dd-{env}-cp-*` tunnel exists → the control
/// plane for this env is present. `false` = likely leaked (a
/// torn-down env whose resources linger).
pub has_live_cp: bool,
/// The env this CP serves (its in-memory agent store is authoritative
/// for the live-agent count below).
pub is_serving_env: bool,
/// Agents the serving CP currently knows for this env (0 for others —
/// a CP only holds its own env's store).
pub known_agents: usize,
pub tunnels: Vec<CfTunnel>,
pub dns: Vec<CfDns>,
pub apps: Vec<CfApp>,
}

#[derive(Default)]
struct Bucket {
tunnels: Vec<CfTunnel>,
dns: Vec<CfDns>,
apps: Vec<CfApp>,
}

/// Build the cross-env map from a fresh CF read. Each list call is
/// independently fallible (mirrors [`crate::cf_snapshot`]); a partial read
/// sets `degraded`, and a downstream reconcile must refuse to act on it.
pub async fn build_map(
http: &Client,
cf_creds: &CfCreds,
serving_env: &str,
store: &Arc<tokio::sync::Mutex<std::collections::HashMap<String, collector::Agent>>>,
) -> CfMap {
let mut fetch_errors = Vec::new();
let raw_tunnels = match cf::list(http, cf_creds).await {
Ok(v) => v,
Err(e) => {
fetch_errors.push(format!("tunnels: {e}"));
Vec::new()
}
};
let raw_dns = match cf::list_dns_records(http, cf_creds).await {
Ok(v) => v,
Err(e) => {
fetch_errors.push(format!("dns: {e}"));
Vec::new()
}
};
let raw_apps = match cf::list_access_apps(http, cf_creds).await {
Ok(v) => v,
Err(e) => {
fetch_errors.push(format!("apps: {e}"));
Vec::new()
}
};

let mut buckets: BTreeMap<String, Bucket> = BTreeMap::new();
// tunnel id → env label, so DNS (which references a tunnel by id in its
// CNAME content) can be attributed to the same installation.
let mut tunnel_env: std::collections::HashMap<String, String> =
std::collections::HashMap::new();

// Tunnels — attribute by name; unparseable dd-* (and any stray) → unattributed.
for t in &raw_tunnels {
let (Some(id), Some(name)) = (
t.get("id").and_then(|v| v.as_str()),
t.get("name").and_then(|v| v.as_str()),
) else {
continue;
};
// Only consider DD-owned tunnels; ignore unrelated tunnels in the account.
if !name.starts_with("dd-") {
continue;
}
let env = Env::from_resource_name(name)
.map(|e| e.label().to_string())
.unwrap_or_else(|| UNATTRIBUTED.to_string());
let cft = CfTunnel {
id: id.to_string(),
name: name.to_string(),
deleted_at: t
.get("deleted_at")
.and_then(|v| v.as_str())
.map(String::from),
};
tunnel_env.insert(cft.id.clone(), env.clone());
buckets.entry(env).or_default().tunnels.push(cft);
}

// DNS — attribute by the tunnel its CNAME targets. A ref we no longer
// have a tunnel for is a leaked record → unattributed. Non-tunnel
// CNAMEs in the zone aren't ours; skip.
for r in &raw_dns {
let (Some(id), Some(name), Some(content)) = (
r.get("id").and_then(|v| v.as_str()),
r.get("name").and_then(|v| v.as_str()),
r.get("content").and_then(|v| v.as_str()),
) else {
continue;
};
let Some(tunnel_id_ref) = content.strip_suffix(".cfargotunnel.com").map(String::from)
else {
continue; // not a tunnel CNAME → not part of the DD map
};
let env = tunnel_env
.get(&tunnel_id_ref)
.cloned()
.unwrap_or_else(|| UNATTRIBUTED.to_string());
let dns = CfDns {
id: id.to_string(),
name: name.to_string(),
content: content.to_string(),
proxied: r.get("proxied").and_then(|v| v.as_bool()).unwrap_or(false),
tunnel_id_ref: Some(tunnel_id_ref),
};
buckets.entry(env).or_default().dns.push(dns);
}

// Access apps — usually empty post-#274 (auth moved in-code). Attribute
// DD-owned apps by name; skip unrelated zone apps.
for a in &raw_apps {
let (Some(id), Some(name), Some(domain)) = (
a.get("id").and_then(|v| v.as_str()),
a.get("name").and_then(|v| v.as_str()),
a.get("domain").and_then(|v| v.as_str()),
) else {
continue;
};
if !name.starts_with("dd-") {
continue;
}
let env = Env::from_resource_name(name)
.map(|e| e.label().to_string())
.unwrap_or_else(|| UNATTRIBUTED.to_string());
let app = CfApp {
id: id.to_string(),
name: name.to_string(),
domain: domain.to_string(),
policy_kind: infer_policy_kind(a),
};
buckets.entry(env).or_default().apps.push(app);
}

// Serving env's live agent count (exclude the CP's own pseudo-entry).
let known_agents = {
let s = store.lock().await;
s.values().filter(|a| !a.tunnel_id.is_empty()).count()
};

let mut installations: Vec<Installation> = buckets
.into_iter()
.map(|(env, b)| {
let unattributed = env == UNATTRIBUTED;
let kind = if unattributed {
"unattributed".to_string()
} else {
Env::parse(&env)
.map(|e| kind_str(&e))
.unwrap_or("unattributed")
.to_string()
};
let has_live_cp = b
.tunnels
.iter()
.any(|t| t.name.contains("-cp-") && t.deleted_at.is_none());
let is_serving_env = !unattributed && env == serving_env;
Installation {
kind,
has_live_cp,
is_serving_env,
known_agents: if is_serving_env { known_agents } else { 0 },
tunnels: b.tunnels,
dns: b.dns,
apps: b.apps,
env,
}
})
.collect();
// `(unattributed)` sorts after real labels because '(' < 'a'; force it last.
installations.sort_by(|a, b| {
let rank = |i: &Installation| (i.env == UNATTRIBUTED, i.env.clone());
rank(a).cmp(&rank(b))
});

CfMap {
fetched_at: chrono::DateTime::<chrono::Utc>::from(SystemTime::now()).to_rfc3339(),
cf_account_id: cf_creds.account_id.clone(),
cf_zone_id: cf_creds.zone_id.clone(),
degraded: !fetch_errors.is_empty(),
cf_fetch_errors: fetch_errors,
serving_env: serving_env.to_string(),
installations,
}
}

fn kind_str(e: &Env) -> &'static str {
use crate::env::EnvKind::*;
match e.kind() {
Production => "production",
Staging => "staging",
Dev => "dev",
Preview => "preview",
Named => "named",
}
}
2 changes: 1 addition & 1 deletion src/cf_snapshot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ async fn build_cf_state(
/// Heuristic: bypass apps have a single policy with `decision="bypass"`
/// and `include` containing "everyone"; human apps have `decision="allow"`
/// with GitHub-org inclusion. Anything else is `unknown`.
fn infer_policy_kind(app: &serde_json::Value) -> String {
pub(crate) fn infer_policy_kind(app: &serde_json::Value) -> String {
let policies = app.get("policies").and_then(|p| p.as_array());
let Some(policies) = policies else {
return "unknown".into();
Expand Down
20 changes: 20 additions & 0 deletions src/cp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ pub async fn run() -> Result<()> {
.route("/api/agents", get(api_agents))
.route("/api/fleet", get(fleet_fragment))
.route("/admin/cf/snapshot", get(cf_snapshot_handler))
.route("/admin/cf/map", get(cf_map_handler))
.route("/api/v1/admin/export", get(export_state))
.route("/admin/enroll", get(enroll_page))
.with_state(state);
Expand Down Expand Up @@ -1312,6 +1313,25 @@ async fn cf_snapshot_handler(
Ok(Json(snap))
}

/// GET /admin/cf/map — unified cross-env view. Enumerates EVERY `dd-*`
/// Cloudflare resource in the account and groups it by installation
/// (production / each pr-N / bot / dogfood / unattributed), so prod and
/// PR previews — which share one CF account — are seen together. Built
/// from CF directly, so it reflects reality regardless of which CPs are
/// up. Same auth as `/admin/cf/snapshot`. Read-only.
async fn cf_map_handler(
State(s): State<St>,
axum::extract::ConnectInfo(peer): axum::extract::ConnectInfo<std::net::SocketAddr>,
headers: axum::http::HeaderMap,
) -> Result<Json<crate::cf_map::CfMap>> {
if !agents_auth_ok(&s, peer, &headers).await {
return Err(Error::Unauthorized);
}
let http = cf::http_client();
let map = crate::cf_map::build_map(&http, &s.cfg.cf, s.cfg.common.env.label(), &s.store).await;
Ok(Json(map))
}

/// Accept the request if the caller is on the loopback interface
/// (same-VM trust — any CP-VM workload / dd-agent-proxy) or presents a valid
/// bearer that verifies as either a GitHub Actions OIDC token for
Expand Down
39 changes: 39 additions & 0 deletions src/env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,22 @@ impl Env {
pub fn is_ephemeral(&self) -> bool {
matches!(self.kind, EnvKind::Preview)
}

/// Attribute a Cloudflare resource to its installation by parsing the
/// env out of a `dd-{env}-{cp|agent|api}-…` name. The env segment is
/// everything between the `dd-` prefix and the first role marker, so a
/// hyphenated label like `pr-42` is recovered intact
/// (`dd-pr-42-agent-<uuid>` → `pr-42`). Returns `None` for names that
/// don't follow the convention (so the caller can bucket them as
/// unattributed). Used by the cross-env CF map.
pub fn from_resource_name(name: &str) -> Option<Self> {
let rest = name.strip_prefix("dd-")?;
let cut = ["-cp-", "-agent-", "-api-"]
.iter()
.filter_map(|m| rest.find(m))
.min()?;
Env::parse(&rest[..cut]).ok()
}
}

impl std::fmt::Display for Env {
Expand Down Expand Up @@ -153,4 +169,27 @@ mod tests {
assert!(!Env::parse("production").unwrap().is_ephemeral());
assert!(!Env::parse("bot").unwrap().is_ephemeral());
}

#[test]
fn from_resource_name_attributes_env() {
let cases = [
("dd-production-cp-1a2b", "production"),
("dd-production-agent-9f8e", "production"),
("dd-pr-42-cp-aaaa", "pr-42"), // hyphenated label recovered
("dd-pr-42-agent-bbbb", "pr-42"),
("dd-pr-42-api-cccc.devopsdefender.com", "pr-42"), // api hostname
("dd-bot-agent-dddd", "bot"),
];
for (name, want) in cases {
assert_eq!(
Env::from_resource_name(name).unwrap().label(),
want,
"name={name}"
);
}
// Non-conforming names → None (caller buckets as unattributed).
assert!(Env::from_resource_name("app.devopsdefender.com").is_none());
assert!(Env::from_resource_name("some-other-tunnel").is_none());
assert!(Env::from_resource_name("dd-no-marker-here").is_none());
}
}
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
pub mod agent;
pub mod auth;
pub mod cf;
pub mod cf_map;
pub mod cf_snapshot;
pub mod collector;
pub mod config;
Expand Down
Loading