Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 27 additions & 28 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions asap-common/sketch-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ serde.workspace = true
rmp-serde = "1.1"
xxhash-rust = { version = "0.8", features = ["xxh32"] }
dsrs = { git = "https://github.com/ProjectASAP/datasketches-rs", rev = "d748ec75c80fff21f7b24897244dd1c895df2e9a" }
sketchlib-rust = { git = "https://github.com/ProjectASAP/sketchlib-rust", rev = "440427438fdaf3ac2298b53ee148f9e12a64ffcc" }
clap = { version = "4.0", features = ["derive"] }

[dev-dependencies]
Expand Down
47 changes: 32 additions & 15 deletions asap-common/sketch-core/report.md
Original file line number Diff line number Diff line change
@@ -1,23 +1,17 @@
# Sketchlib Fidelity Report

Compares the **legacy** sketch implementations in `sketch-core` vs the new **sketchlib-rust** backends for:
Compares the **legacy** Count-Min Sketch implementation in `sketch-core` vs the new **sketchlib-rust** backend.

- `CountMinSketch`
- `CountMinSketchWithHeap` (Count-Min portion)
- `KllSketch`
- `HydraKllSketch` (via `KllSketch`)
## Fidelity harness

## Running Fidelity Tests
The fidelity binary selects backends via CLI flags.

The fidelity binary selects backends via CLI flags instead of environment variables.
| Goal | Command |
|-------------|---------------------------------------------------------------|
| CMS sketchlib | `cargo run -p sketch-core --bin sketchlib_fidelity -- --cms-impl sketchlib` |
| CMS legacy | `cargo run -p sketch-core --bin sketchlib_fidelity -- --cms-impl legacy` |

| Goal | Command |
|--------------------------|--------------------------------------------------------------------------------------------------------------|
| Default (all sketchlib) | `cargo run -p sketch-core --bin sketchlib_fidelity` |
| All legacy | `cargo run -p sketch-core --bin sketchlib_fidelity -- --cms-impl legacy --kll-impl legacy --cmwh-impl legacy` |
| Legacy KLL only | `cargo run -p sketch-core --bin sketchlib_fidelity -- --cms-impl sketchlib --kll-impl legacy --cmwh-impl sketchlib` |

## Unit Tests
## Unit tests

Unit tests always run with **legacy** backends enabled (the test ctor calls
`force_legacy_mode_for_tests()`), so you only need:
Expand All @@ -28,4 +22,27 @@ cargo test -p sketch-core

## Results

Fidelity results will be added as sketch implementations are integrated in subsequent PRs.
### CountMinSketch (accuracy vs exact counts)

#### depth=3

| width | n | domain | Mode | Pearson corr | MAPE (%) | RMSE (%) |
|-------|--------|--------|----------------|----------------|----------|----------|
| 1024 | 100000 | 1000 | Legacy | 0.9998451189 | 24.48 | 52.76 |
| 1024 | 100000 | 1000 | sketchlib-rust | 0.9998387103 | 24.36 | 54.11 |

#### depth=5

| width | n | domain | Mode | Pearson corr | MAPE (%) | RMSE (%) |
|-------|--------|--------|----------------|----------------|----------|----------|
| 2048 | 200000 | 2000 | Legacy | 0.9999733814 | 8.75 | 29.94 |
| 2048 | 200000 | 2000 | sketchlib-rust | 0.9999744627 | 8.37 | 28.84 |
| 2048 | 50000 | 500 | Legacy | 1.0000000000 | 0.00 | 0.00 |
| 2048 | 50000 | 500 | sketchlib-rust | 1.0000000000 | 0.00 | 0.00 |

#### depth=7

| width | n | domain | Mode | Pearson corr | MAPE (%) | RMSE (%) |
|-------|--------|--------|----------------|----------------|----------|----------|
| 4096 | 200000 | 2000 | Legacy | 0.9999993694 | 0.20 | 3.69 |
| 4096 | 200000 | 2000 | sketchlib-rust | 0.9999993499 | 0.21 | 4.27 |
96 changes: 89 additions & 7 deletions asap-common/sketch-core/src/bin/sketchlib_fidelity.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
// Scaffold for fidelity benchmarks; helpers used in later PRs when sketch types are integrated.
// Fidelity benchmarks comparing legacy vs sketchlib implementations across sketch types.
#![allow(dead_code)]

use clap::Parser;
use sketch_core::config::{self, ImplMode};
use sketch_core::count_min::CountMinSketch;

#[derive(Clone)]
struct Lcg64 {
Expand Down Expand Up @@ -97,13 +98,58 @@ fn rank_fraction(sorted: &[f64], x: f64) -> f64 {
(idx as f64) / (sorted.len() as f64)
}

// --- CountMinSketch parameter sets and runner ---

struct CmsParams {
depth: usize,
width: usize,
n: usize,
domain: usize,
}

struct CmsResult {
pearson: f64,
mape: f64,
rmse: f64,
}

fn run_countmin_once(seed: u64, p: &CmsParams) -> CmsResult {
let mut rng = Lcg64::new(seed);
let mut exact: Vec<f64> = vec![0.0; p.domain];
let mut cms = CountMinSketch::new(p.depth, p.width);

for _ in 0..p.n {
let r = rng.next_u64();
let key_id = if (r & 0xFF) < 200 {
(r as usize) % 20
} else {
(r as usize) % p.domain
};
let key = format!("k{key_id}");
cms.update(&key, 1.0);
exact[key_id] += 1.0;
}

let mut est: Vec<f64> = Vec::with_capacity(p.domain);
for key_id in 0..p.domain {
let key = format!("k{key_id}");
est.push(cms.query_key(&key));
}

CmsResult {
pearson: pearson_corr(&exact, &est),
mape: mape(&exact, &est),
rmse: rmse_percentage(&exact, &est),
}
}

#[derive(Parser)]
struct Args {
#[arg(long, value_enum, default_value_t = sketch_core::config::DEFAULT_IMPL_MODE)]
#[arg(long, value_enum, default_value_t = sketch_core::config::DEFAULT_CMS_IMPL)]
cms_impl: ImplMode,
#[arg(long, value_enum, default_value_t = sketch_core::config::DEFAULT_IMPL_MODE)]
#[arg(long, value_enum, default_value_t = sketch_core::config::DEFAULT_KLL_IMPL)]
kll_impl: ImplMode,
#[arg(long, value_enum, default_value_t = sketch_core::config::DEFAULT_IMPL_MODE)]
#[arg(long, value_enum, default_value_t = sketch_core::config::DEFAULT_CMWH_IMPL)]
cmwh_impl: ImplMode,
}

Expand All @@ -112,6 +158,7 @@ fn main() {
config::configure(args.cms_impl, args.kll_impl, args.cmwh_impl)
.expect("sketch backend already initialised");

let seed = 0xC0FFEE_u64;
let mode = if matches!(args.cms_impl, ImplMode::Legacy)
|| matches!(args.kll_impl, ImplMode::Legacy)
|| matches!(args.cmwh_impl, ImplMode::Legacy)
Expand All @@ -121,7 +168,42 @@ fn main() {
"sketchlib-rust"
};

println!("# Sketchlib Fidelity Report ({})", mode);
println!();
println!("Fidelity tests will be added as sketch implementations are integrated.");
// CountMinSketch: multiple (depth, width, n, domain)
let cms_param_sets: Vec<CmsParams> = vec![
CmsParams {
depth: 3,
width: 1024,
n: 100_000,
domain: 1000,
},
CmsParams {
depth: 5,
width: 2048,
n: 200_000,
domain: 2000,
},
CmsParams {
depth: 7,
width: 4096,
n: 200_000,
domain: 2000,
},
CmsParams {
depth: 5,
width: 2048,
n: 50_000,
domain: 500,
},
];

println!("## CountMinSketch ({mode})");
println!("| depth | width | n_updates | domain | Pearson corr | MAPE (%) | RMSE (%) |");
println!("|-------|-------|------------|--------|--------------|----------|----------|");
for p in &cms_param_sets {
let r = run_countmin_once(seed, p);
println!(
"| {} | {} | {} | {} | {:.10} | {:.6} | {:.6} |",
p.depth, p.width, p.n, p.domain, r.pearson, r.mape, r.rmse
);
}
}
14 changes: 9 additions & 5 deletions asap-common/sketch-core/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,34 @@ pub enum ImplMode {
Sketchlib,
}

/// Default backend when not explicitly configured (e.g. for binaries that don't pass CLI args).
/// Single source of truth for sketch backend defaults.
/// Global default when impl mode is not explicitly configured (e.g. env var parsing).
pub const DEFAULT_IMPL_MODE: ImplMode = ImplMode::Legacy;

/// Per-backend defaults. Used when configure() has not been called.
pub const DEFAULT_CMS_IMPL: ImplMode = ImplMode::Sketchlib;
pub const DEFAULT_KLL_IMPL: ImplMode = ImplMode::Legacy;
pub const DEFAULT_CMWH_IMPL: ImplMode = ImplMode::Legacy;

static COUNTMIN_MODE: OnceLock<ImplMode> = OnceLock::new();

/// Returns true if Count-Min operations should use sketchlib-rust internally.
pub fn use_sketchlib_for_count_min() -> bool {
*COUNTMIN_MODE.get_or_init(|| DEFAULT_IMPL_MODE) == ImplMode::Sketchlib
*COUNTMIN_MODE.get_or_init(|| DEFAULT_CMS_IMPL) == ImplMode::Sketchlib
}

static KLL_MODE: OnceLock<ImplMode> = OnceLock::new();

/// Returns true if KLL operations should use sketchlib-rust internally.
pub fn use_sketchlib_for_kll() -> bool {
*KLL_MODE.get_or_init(|| DEFAULT_IMPL_MODE) == ImplMode::Sketchlib
*KLL_MODE.get_or_init(|| DEFAULT_KLL_IMPL) == ImplMode::Sketchlib
}

static COUNTMIN_WITH_HEAP_MODE: OnceLock<ImplMode> = OnceLock::new();

/// Returns true if Count-Min-With-Heap operations should use sketchlib-rust internally for the
/// Count-Min portion.
pub fn use_sketchlib_for_count_min_with_heap() -> bool {
*COUNTMIN_WITH_HEAP_MODE.get_or_init(|| DEFAULT_IMPL_MODE) == ImplMode::Sketchlib
*COUNTMIN_WITH_HEAP_MODE.get_or_init(|| DEFAULT_CMWH_IMPL) == ImplMode::Sketchlib
}

/// Set backend modes for all sketch types. Call once at process startup,
Expand Down
Loading
Loading