Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,14 +135,15 @@ cmake --build --preset macos-release
📖 **For complete build instructions, troubleshooting, and platform-specific notes, see [README_BUILD.md](README_BUILD.md)**

### Building with Docker
The Dockerfile COPYs folders/files required to build the CPP SDK into the image.
The Docker setup is split into a reusable base image and an SDK image layered on top of it.
**NOTE:** this has only been tested on Linux
```bash
docker build -t livekit-cpp-sdk . -f docker/Dockerfile
docker build -t livekit-cpp-sdk-base . -f docker/Dockerfile.base
docker build --build-arg BASE_IMAGE=livekit-cpp-sdk-base -t livekit-cpp-sdk . -f docker/Dockerfile.sdk
docker run -it --network host livekit-cpp-sdk:latest bash
```

__NOTE:__ if you are building your own Dockerfile, you will likely need to set the same `ENV` variables as in `docker/Dockerfile`, but to the relevant directories:
__NOTE:__ if you are building your own Dockerfile, you will likely need to set the same `ENV` variables as in `docker/Dockerfile.base`, but to the relevant directories:
```bash
export CC=$HOME/gcc-14/bin/gcc
export CXX=$HOME/gcc-14/bin/g++
Expand Down
3 changes: 3 additions & 0 deletions benchmarks/data_track_throughput/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*env*/
*/__pycache__/
*throughput_results/*
95 changes: 95 additions & 0 deletions benchmarks/data_track_throughput/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Copyright 2026 LiveKit, Inc.
#
# Standalone CMake build for the data-track throughput experiment.
# All paths are relative to CMAKE_CURRENT_SOURCE_DIR so this directory
# can be moved or renamed freely.

cmake_minimum_required(VERSION 3.20)
project(DataTrackThroughput LANGUAGES CXX)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

# ---- Dependencies --------------------------------------------------------

find_package(LiveKit CONFIG REQUIRED)

find_package(nlohmann_json 3.11 QUIET)
if(NOT nlohmann_json_FOUND)
include(FetchContent)
FetchContent_Declare(
nlohmann_json
GIT_REPOSITORY https://github.com/nlohmann/json.git
GIT_TAG v3.11.3
GIT_SHALLOW TRUE
)
FetchContent_MakeAvailable(nlohmann_json)
endif()

# ---- Targets -------------------------------------------------------------

set(_targets DataTrackThroughputProducer DataTrackThroughputConsumer)

add_executable(DataTrackThroughputProducer producer.cpp)
add_executable(DataTrackThroughputConsumer consumer.cpp)

foreach(_target ${_targets})
target_include_directories(${_target} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
target_link_libraries(${_target} PRIVATE LiveKit::livekit nlohmann_json::nlohmann_json)
endforeach()

# ---- RPATH ---------------------------------------------------------------

if(UNIX)
if(APPLE)
set_target_properties(${_targets} PROPERTIES
BUILD_RPATH "@loader_path"
INSTALL_RPATH "@loader_path"
)
else()
set_target_properties(${_targets} PROPERTIES
BUILD_RPATH "$ORIGIN"
INSTALL_RPATH "$ORIGIN"
BUILD_RPATH_USE_ORIGIN TRUE
)
endif()
endif()

# ---- Copy SDK shared libraries next to executables -----------------------

get_target_property(_lk_location LiveKit::livekit LOCATION)
if(_lk_location)
get_filename_component(_lk_lib_dir "${_lk_location}" DIRECTORY)
else()
get_target_property(_lk_location LiveKit::livekit IMPORTED_LOCATION)
if(NOT _lk_location)
get_target_property(_lk_location LiveKit::livekit IMPORTED_LOCATION_RELEASE)
endif()
if(NOT _lk_location)
get_target_property(_lk_location LiveKit::livekit IMPORTED_LOCATION_DEBUG)
endif()
if(_lk_location)
get_filename_component(_lk_lib_dir "${_lk_location}" DIRECTORY)
endif()
Comment on lines +60 to +73
Copy link

Copilot AI Apr 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using get_target_property(... LOCATION) is deprecated/disallowed under newer CMake policies (and can produce warnings/errors for imported targets). Since you already fall back to IMPORTED_LOCATION*, consider dropping the LOCATION lookup entirely and/or using the imported location properties consistently to avoid policy/CMake-version sensitivity.

Suggested change
get_target_property(_lk_location LiveKit::livekit LOCATION)
if(_lk_location)
get_filename_component(_lk_lib_dir "${_lk_location}" DIRECTORY)
else()
get_target_property(_lk_location LiveKit::livekit IMPORTED_LOCATION)
if(NOT _lk_location)
get_target_property(_lk_location LiveKit::livekit IMPORTED_LOCATION_RELEASE)
endif()
if(NOT _lk_location)
get_target_property(_lk_location LiveKit::livekit IMPORTED_LOCATION_DEBUG)
endif()
if(_lk_location)
get_filename_component(_lk_lib_dir "${_lk_location}" DIRECTORY)
endif()
get_target_property(_lk_location LiveKit::livekit IMPORTED_LOCATION)
if(NOT _lk_location)
get_target_property(_lk_location LiveKit::livekit IMPORTED_LOCATION_RELEASE)
endif()
if(NOT _lk_location)
get_target_property(_lk_location LiveKit::livekit IMPORTED_LOCATION_DEBUG)
endif()
if(_lk_location)
get_filename_component(_lk_lib_dir "${_lk_location}" DIRECTORY)

Copilot uses AI. Check for mistakes.
endif()

if(_lk_lib_dir)
if(WIN32)
file(GLOB _sdk_shared_libs "${_lk_lib_dir}/../bin/*.dll" "${_lk_lib_dir}/*.dll")
elseif(APPLE)
file(GLOB _sdk_shared_libs "${_lk_lib_dir}/*.dylib")
else()
file(GLOB _sdk_shared_libs "${_lk_lib_dir}/*.so" "${_lk_lib_dir}/*.so.*")
endif()

foreach(_target ${_targets})
foreach(_lib ${_sdk_shared_libs})
get_filename_component(_lib_name "${_lib}" NAME)
add_custom_command(TARGET ${_target} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different
"${_lib}" "$<TARGET_FILE_DIR:${_target}>/${_lib_name}"
COMMENT "Copying ${_lib_name} next to ${_target}"
)
endforeach()
endforeach()
endif()
273 changes: 273 additions & 0 deletions benchmarks/data_track_throughput/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
# Data Track Throughput Experiment

Coordinated producer and consumer for benchmarking `LocalDataTrack` /
`RemoteDataTrack` throughput across a sweep of payload sizes and publish rates.

## What It Does

- `producer.cpp`
- Publishes a data track named `data-track-throughput`
- Runs a default sweep of payload sizes and publish rates (see
**Test Bounds** below)
- Calls the consumer over RPC before and after each scenario

- `consumer.cpp`
- Registers a room data-frame callback for the producer's data track
- Receives every frame and records arrival timestamps
- Logs validation warnings (size mismatches, header mismatches, etc.) to stderr
- Tracks duplicates and missing messages
- Appends raw data to scenario-level and per-message CSV files

## Design Principles

- **Raw data only in CSV.** The consumer writes only directly measured values
(counts, byte totals, microsecond timestamps). All derived metrics (throughput,
latency percentiles, delivery ratio, etc.) are computed at analysis time by
`scripts/plot_throughput.py`.
- **Fixed packet size per scenario.** Each scenario uses a single
`packet_size_bytes`. This ensures every message in a run is the same size,
making aggregate measurements unambiguous.
- **Minimal measurement overhead.** The hot `onDataFrame` callback captures the
arrival timestamp first, then appends to an in-memory vector under a brief
mutex. File I/O happens only at finalization after all data is collected.

## Test Bounds

All bounds are defined in `common.h`. A scenario is any combination of
(payload size, publish rate) that passes all three constraints below.

### Hard Limits

| Parameter | Min | Max |
|-----------|-----|-----|
| Packet size | 1 KiB | 256 MiB |
| Publish rate | 1 Hz | 50k Hz |

### Data-Rate Budget

Every scenario must satisfy:

```
packet_size_bytes * desired_rate_hz <= 10 Gbps (1.25 GB/s)
```

This naturally allows small messages at very high rates and large messages at
low rates while preventing any single scenario from attempting an unreasonable
throughput that would destabilize the connection.

### Default Sweep Grid

The default sweep iterates over 13 payload sizes and 13 publish rates, skipping
any combination that exceeds the data-rate budget:

**Payload sizes:** 1 KiB, 4 KiB, 16 KiB, 64 KiB, 128 KiB, 256 KiB, 512 KiB,
1 MiB, 2 MiB, 4 MiB, 16 MiB, 64 MiB, 256 MiB

**Publish rates:** 1, 5, 10, 25, 50, 100, 200, 500, 1k, 5k, 10k, 20k, 50k Hz

The budget clips larger payloads to lower rates. For example:

| Payload | Max rate allowed |
|---------|-----------------|
| 1 KiB | 50k Hz (all rates) |
| 16 KiB | 50k Hz (all rates) |
| 64 KiB | 10k Hz |
| 256 KiB | 1k Hz |
| 1 MiB | 1k Hz |
| 4 MiB | 200 Hz |
| 64 MiB | 10 Hz |
| 256 MiB | 1 Hz |

The budget clips larger payloads to lower rates. For example:

| Payload | Max rate allowed |
|---------|-----------------|
| 1 KiB | 50k Hz (all rates) |
| 16 KiB | 50k Hz (all rates) |
| 64 KiB | 10k Hz |
| 256 KiB | 1k Hz |
| 1 MiB | 1k Hz |
| 4 MiB | 200 Hz |
| 64 MiB | 10 Hz |
| 256 MiB | 1 Hz |

Comment on lines +81 to +93
Copy link

Copilot AI Apr 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This section is duplicated verbatim (the "The budget clips larger payloads..." paragraph and the payload/rate table appear twice). Please remove the duplicate copy to avoid confusion and keep the README concise.

Suggested change
The budget clips larger payloads to lower rates. For example:
| Payload | Max rate allowed |
|---------|-----------------|
| 1 KiB | 50k Hz (all rates) |
| 16 KiB | 50k Hz (all rates) |
| 64 KiB | 10k Hz |
| 256 KiB | 1k Hz |
| 1 MiB | 1k Hz |
| 4 MiB | 200 Hz |
| 64 MiB | 10 Hz |
| 256 MiB | 1 Hz |

Copilot uses AI. Check for mistakes.
Single-scenario mode (`--rate-hz`, `--packet-size`, `--num-msgs`) bypasses the
default grid and only enforces the hard limits and data-rate budget, allowing
any valid combination to be tested explicitly.

## CSV Output

The consumer writes raw measurement data only. All derived metrics are computed
at analysis time by `scripts/plot_throughput.py`.

### `throughput_summary.csv`

One row per scenario. Contains only raw counts, byte totals, and microsecond
timestamps:

| Column | Description |
|--------|-------------|
| `run_id` | Unique scenario identifier |
| `scenario_name` | Human-readable scenario label |
| `desired_rate_hz` | Requested publish rate |
| `packet_size_bytes` | Fixed packet size for this scenario |
| `messages_requested` | Number of messages the producer was told to send |
| `messages_attempted` | Number of messages the producer tried to send |
| `messages_enqueued` | Number of messages successfully enqueued |
| `messages_enqueue_failed` | Number of enqueue failures |
| `messages_received` | Unique messages received by consumer |
| `messages_missed` | `messages_requested - messages_received` |
| `duplicate_messages` | Number of duplicate frames received |
| `attempted_bytes` | Total bytes the producer attempted to send |
| `enqueued_bytes` | Total bytes successfully enqueued |
| `received_bytes` | Total bytes received by consumer |
| `first_send_time_us` | Timestamp of first send (microseconds since epoch) |
| `last_send_time_us` | Timestamp of last send |
| `first_arrival_time_us` | Timestamp of first arrival at consumer |
| `last_arrival_time_us` | Timestamp of last arrival at consumer |

### `throughput_messages.csv`

One row per received frame. Raw observation data only:

| Column | Description |
|--------|-------------|
| `run_id` | Scenario identifier |
| `sequence` | Message sequence number |
| `payload_bytes` | Actual payload size received |
| `send_time_us` | Producer send timestamp (microseconds since epoch) |
| `arrival_time_us` | Consumer arrival timestamp (microseconds since epoch) |
| `is_duplicate` | 1 if this sequence was already seen, 0 otherwise |

## Prerequisites

- CMake 3.20+
- C++17 compiler
- The LiveKit C++ SDK, built and installed (see below)

## Building

All commands below assume you are in **this directory**
(`data_track_throughput/`).

### 1. Build and install the SDK

From the SDK repository root:

```bash
./build.sh # builds the SDK (debug by default)
cmake --install build-debug --prefix local-install
```

### 2. Configure this experiment

```bash
cmake -S . -B build \
-DCMAKE_PREFIX_PATH="$(cd ../../local-install && pwd)"
```

> Adjust the `CMAKE_PREFIX_PATH` to wherever the SDK was installed. The path
> above assumes this directory lives two levels below the repository root; it
> works regardless of the parent directory's name.

### 3. Build

```bash
cmake --build build
```

The executables and required shared libraries are placed in `build/`.

## Build Targets

- `DataTrackThroughputConsumer`
- `DataTrackThroughputProducer`

## Running

## Generate Tokens

```bash
# producer
lk token create \
--api-key devkey \
--api-secret secret \
-i producer \
--join \
--valid-for 99999h \
--room robo_room \
--grant '{"canPublish":true,"canSubscribe":true,"canPublishData":true}'

# consumer
lk token create \
--api-key devkey \
--api-secret secret \
-i consumer \
--join \
--valid-for 99999h \
--room robo_room \
--grant '{"canPublish":true,"canSubscribe":true,"canPublishData":true}'
```

Start the local server:
```bash
LIVEKIT_CONFIG="enable_data_tracks: true" livekit-server --dev
```

Start the consumer first:

```bash
./build/DataTrackThroughputConsumer <ws-url> <consumer-token>
```

Then start the producer:

```bash
./build/DataTrackThroughputProducer <ws-url> <producer-token> --consumer consumer
```

If you omit `--consumer`, the producer expects exactly one remote participant
to already be in the room.

## Single Scenario

Instead of the full sweep, you can run one scenario:

```bash
./build/DataTrackThroughputProducer \
<ws-url> <producer-token> \
--consumer <consumer-identity> \
--rate-hz 50 \
--packet-size 1mb \
--num-msgs 25
```

## Plotting

Generate plots from a benchmark output directory:

```bash
python3 scripts/plot_throughput.py data_track_throughput_results
```

By default the script writes PNGs into `data_track_throughput_results/plots/`.
Pass `--output-dir <path>` to override the output location.

All derived metrics (throughput, latency percentiles, delivery ratio, receive
rate, interarrival times) are computed from the raw CSV timestamps and counts
at plot time.

### Generated Plots

From `throughput_summary.csv` + `throughput_messages.csv`:

| File | Description |
|------|-------------|
| `expected_vs_actual_throughput.png` | Scatter plot comparing expected vs actual receive throughput (Mbps). Points are colored by desired publish rate and sized by payload. An ideal y=x reference line is overlaid. |
| `dropped_messages_vs_expected_throughput.png` | Scatter plot of missed/dropped message count vs expected throughput, colored by payload size (log scale). |
| `actual_throughput_heatmap.png` | Heatmap of actual receive throughput (Mbps) with payload size on the y-axis and desired rate on the x-axis. |
| `delivery_ratio_heatmap.png` | Heatmap of delivery ratio (received / requested) over the same payload-size x rate grid. |
| `p50_latency_heatmap.png` | Heatmap of median (P50) send-to-receive latency (ms) over the same grid. |
| `p95_latency_heatmap.png` | Heatmap of P95 send-to-receive latency (ms) over the same grid. |
| `message_latency_histogram.png` | Histogram of per-message latency (ms) across all received frames. |
| `message_interarrival_series.png` | Time-series line plot of inter-arrival gaps (ms) for every received message, ordered by run then arrival time. |
Loading