Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion crates/openshell-bootstrap/src/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ pub async fn build_and_push_image(
on_log(format!(
"Pushing image {tag} into gateway \"{gateway_name}\""
));
let local_docker = Docker::connect_with_local_defaults()
// Use the long-timeout Docker client so `docker save` of multi-GB images
// doesn't trip the 120s bollard default mid-stream. Override with
// OPENSHELL_DOCKER_TIMEOUT_SECS=<secs>.
let local_docker = crate::docker::connect_local_for_large_transfers()
.into_diagnostic()
.wrap_err("failed to connect to local Docker daemon")?;
let container = container_name(gateway_name);
Expand Down
18 changes: 18 additions & 0 deletions crates/openshell-bootstrap/src/docker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,24 @@ use std::collections::HashMap;

const REGISTRY_NAMESPACE_DEFAULT: &str = "openshell";

/// Default total HTTP timeout for Docker API calls that stream large payloads
/// (e.g. `docker save` used by `sandbox create --from`). Bollard's own
/// `connect_with_local_defaults()` ceiling is 120s, which is far too short for
/// multi-GB image exports — a 7 GB image on a laptop SSD takes ~4–5 minutes.
/// One hour is a safe upper bound; override with `OPENSHELL_DOCKER_TIMEOUT_SECS`.
pub(crate) const DEFAULT_LARGE_TRANSFER_TIMEOUT_SECS: u64 = 3600;

/// Build a local-Docker client suitable for large streaming transfers.
/// Respects `OPENSHELL_DOCKER_TIMEOUT_SECS` (in seconds); falls back to
/// [`DEFAULT_LARGE_TRANSFER_TIMEOUT_SECS`] when unset or unparseable.
pub fn connect_local_for_large_transfers() -> std::result::Result<Docker, BollardError> {
let secs: u64 = std::env::var("OPENSHELL_DOCKER_TIMEOUT_SECS")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(DEFAULT_LARGE_TRANSFER_TIMEOUT_SECS);
Ok(Docker::connect_with_local_defaults()?.with_timeout(std::time::Duration::from_secs(secs)))
}

/// Resolve the raw GPU device-ID list, replacing the `"auto"` sentinel with a
/// concrete device ID based on whether CDI is enabled on the daemon.
///
Expand Down
5 changes: 4 additions & 1 deletion crates/openshell-bootstrap/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,10 @@ where
.collect();
if !images.is_empty() {
log("[status] Deploying components".to_string());
let local_docker = Docker::connect_with_local_defaults().into_diagnostic()?;
// Long-timeout client: `docker save` of multi-GB component
// images streams past bollard's 120s default. See
// docker::connect_local_for_large_transfers().
let local_docker = docker::connect_local_for_large_transfers().into_diagnostic()?;
let container = container_name(&name);
let on_log_ref = Arc::clone(&on_log);
let mut push_log = move |msg: String| {
Expand Down
Loading