diff --git a/.changeset/bump-bulletin-deploy-0-7-14.md b/.changeset/bump-bulletin-deploy-0-7-14.md new file mode 100644 index 0000000..7652a75 --- /dev/null +++ b/.changeset/bump-bulletin-deploy-0-7-14.md @@ -0,0 +1,5 @@ +--- +"playground-cli": patch +--- + +Bump `bulletin-deploy` to `0.7.14`. Internal hardening of the chunked-storage path against WS-halt allocation storms: per-deploy retry-budget circuit breaker, recovery batch-size drop (2→1 in flight after first reconnect), and a synchronous WS-close hook that destroys the PAPI client before its broadcast-replay loop can OOM. No public-API changes. diff --git a/CLAUDE.md b/CLAUDE.md index fb16888..b4d5c06 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -23,7 +23,7 @@ These are things that aren't self-evident from reading the code and have bitten - **`src/utils/deploy/*` and `src/utils/build/*` must not import React or Ink.** They form the SDK surface that RevX consumes from a WebContainer. TUI code lives in `src/commands/*/`. - **Bun compiled-binary stdin quirk** — Ink's `useInput` silently drops every keystroke (arrows, Enter, Ctrl+C) in `bun build --compile` binaries unless `process.stdin.on('readable', …)` is touched before Ink's `render()`. We install a no-op `readable` listener at the top of `src/index.ts` as a warm-up. Do NOT remove it until Bun's compiled-binary TTY stdin behaves like Node's. Symptom if this breaks: TUI renders but nothing responds, including Ctrl+C. - **`bulletin-deploy` 0.7.4+ pulls in a transitive dep with a broken publish manifest** that pnpm refuses to install. `@parity/dotns-cli` (0.6.0 and 0.6.1 both) publishes a `package.json` declaring `"@polkadot-api/descriptors": "file:.papi/descriptors"` — a workspace-only path that doesn't exist in the published tarball. npm tolerates the dangling `file:` reference (creates a broken symlink and continues); pnpm's strict resolver fails with `ERR_PNPM_LINKED_PKG_DIR_NOT_FOUND`. We work around it with a `pnpm.overrides` entry in `package.json` pointing the offending sub-dep at a tiny stub package (`stubs/papi-descriptors-stub/`) so resolution succeeds. The dep is functionally vestigial — dotns-cli's `dist/cli.js` is fully-bundled (Bun build, no externals) and never imports `@polkadot-api/descriptors` at runtime, so the stub exporting `{}` is correct. **Remove the override + stub once `@parity/dotns-cli` republishes a clean manifest.** Tracked upstream against `paritytech/dotns-sdk`. Our direct pin is at the same exact version `bulletin-deploy@0.7.13` declares (`^0.6.1` → `0.6.1`) so both top-level resolution (used by Bun's file-import bundling in `src/dotns-cli-dispatch.ts`) and bulletin-deploy's runtime `_require.resolve("@parity/dotns-cli")` land on the same tarball. -- **`bulletin-deploy` is pinned to an explicit version, not `latest`.** We're on `0.7.13` stable today. The `latest` npm dist-tag is a moving target and previously pointed at 0.6.8, which has a WebSocket heartbeat bug (default 40s < chunk timeout 60s) that tears down uploads mid-flight as `WS halt (3)`. Keep the pin explicit so we never silently slide onto a broken `latest`. When upgrading, read the release notes for any public-API changes to `deploy()`, `DotNS` methods, or the `DeployOptions` we rely on (`jsMerkle`, `signer`, `signerAddress`, `mnemonic`, `rpc`, `attributes`). Note: 0.7.0 removed the `playground?: boolean` `DeployOption` (registry publishing now lives here in `src/utils/deploy/playground.ts`), which is a no-op for us since we never passed that flag. 0.7.1 made the memory-report teardown Bun-safe upstream. 0.7.2 bumped the default `CHUNK_TIMEOUT_MS` 60s → 180s to match Bulletin's new 24s Aura slot duration; `BULLETIN_CHUNK_TIMEOUT_MS` override still works. 0.7.4 extracted the dotns logic into a separate `@parity/dotns-cli` subprocess (forked via `_require.resolve("@parity/dotns-cli")`); see the publish-bug workaround note above. 0.7.4 also moved label classification off the `DotNS` instance — the previously-instance method `dotns.classifyName(label)` is now the top-level pure function `classifyDotnsLabel(label)`, and the result field renamed `requiredStatus` → `status`. The function isn't re-exported from the package root, so `src/utils/deploy/availability.ts` mirrors the (small, stable) logic locally as `classifyLabel` — same precedent as `simulateUserStatus`. 0.7.6 added ambient Sentry mode for host apps; keep the CLI-owned privacy gate in `src/bootstrap.ts`. 0.7.9 includes the DotNS/deploy fixes needed by the CDM E2E path. 0.7.13 added a `--env ` selector to the `bulletin-deploy` CLI binary (paseo-next default; preview, paseo-review, polkadot, kusama) plus three additive deploy span attributes (`deploy.env`, `deploy.network`, `deploy.environments_source`); zero behaviour change for library consumers like us — we keep using `--rpc` / `BULLETIN_RPC` precedence and the default endpoint resolves to the same paseo-next WSS as before. +- **`bulletin-deploy` is pinned to an explicit version, not `latest`.** We're on `0.7.14` stable today. The `latest` npm dist-tag is a moving target and previously pointed at 0.6.8, which has a WebSocket heartbeat bug (default 40s < chunk timeout 60s) that tears down uploads mid-flight as `WS halt (3)`. Keep the pin explicit so we never silently slide onto a broken `latest`. When upgrading, read the release notes for any public-API changes to `deploy()`, `DotNS` methods, or the `DeployOptions` we rely on (`jsMerkle`, `signer`, `signerAddress`, `mnemonic`, `rpc`, `attributes`). Note: 0.7.0 removed the `playground?: boolean` `DeployOption` (registry publishing now lives here in `src/utils/deploy/playground.ts`), which is a no-op for us since we never passed that flag. 0.7.1 made the memory-report teardown Bun-safe upstream. 0.7.2 bumped the default `CHUNK_TIMEOUT_MS` 60s → 180s to match Bulletin's new 24s Aura slot duration; `BULLETIN_CHUNK_TIMEOUT_MS` override still works. 0.7.4 extracted the dotns logic into a separate `@parity/dotns-cli` subprocess (forked via `_require.resolve("@parity/dotns-cli")`); see the publish-bug workaround note above. 0.7.4 also moved label classification off the `DotNS` instance — the previously-instance method `dotns.classifyName(label)` is now the top-level pure function `classifyDotnsLabel(label)`, and the result field renamed `requiredStatus` → `status`. The function isn't re-exported from the package root, so `src/utils/deploy/availability.ts` mirrors the (small, stable) logic locally as `classifyLabel` — same precedent as `simulateUserStatus`. 0.7.6 added ambient Sentry mode for host apps; keep the CLI-owned privacy gate in `src/bootstrap.ts`. 0.7.9 includes the DotNS/deploy fixes needed by the CDM E2E path. 0.7.13 added a `--env ` selector to the `bulletin-deploy` CLI binary (paseo-next default; preview, paseo-review, polkadot, kusama) plus three additive deploy span attributes (`deploy.env`, `deploy.network`, `deploy.environments_source`); zero behaviour change for library consumers like us — we keep using `--rpc` / `BULLETIN_RPC` precedence and the default endpoint resolves to the same paseo-next WSS as before. 0.7.14 hardens the chunked-storage path against WS-halt allocation storms (issues #142/#216/#287): a per-deploy retry-budget circuit breaker (defaults: 5 events / 30s, tunable via `BULLETIN_RETRY_BUDGET_MAX` and `BULLETIN_RETRY_BUDGET_WINDOW_MS`) bails with a clear `Retry budget exhausted: …` error rather than letting GC fall behind; recovery batch size drops from 2-in-flight to 1-in-flight after the first reconnect; and a synchronous `onStatusChanged(CLOSE|ERROR)` hook destroys the PAPI client before its `activeBroadcasts.forEach` loop can mutate-while-iterating into OOM. Public surface (`deploy`, `DeployContent`, `DeployOptions`, `DeployResult`) is unchanged; the new exports `setWsHaltCallback` / `retryBudgetExhausted` / `isConnectionError` are internal utilities we don't import. Telemetry now sets `deploy.status="ok"` on the success path (we already get `error`/`killed` from #289). The previously-suspect `new Uint8Array(fs.readFileSync(...))` double-wrap is gone (perf-only). Our metadata-upload bypass via PAPI's `TransactionStorage.store` is unaffected — these changes only live inside `storeChunkedContent` / `deploy()`. - **Throttle TUI info updates** — bulletin-deploy logs per-chunk and builds (vite/next) stream thousands of lines/sec. Calling `setState` on every log event floods React's reconciler with so much backpressure the process can balloon past 20 GB and freeze the OS. `RunningStage` coalesces "latest info" updates to ≤10/sec via a ref + timer and caps line length at 160 chars. Any new hot-path event sink should do the same; don't hook raw per-line streams directly into Ink state. - **Process-guard safety net** (`src/utils/process-guard.ts`) — deploy pipelines open several long-lived WebSockets + child processes and any one of them can keep the event loop alive after the TUI visibly finishes, turning `dot` into a zombie that accumulates retry buffers indefinitely (seen climbing past 25 GB). We defend in depth: (1) `installSignalHandlers()` catches SIGINT/TERM/HUP + `unhandledRejection` and forces cleanup + exit within 3 s. The `unhandledRejection` handler runs each rejection through `isBenignUnsubscriptionError`, which suppresses two known post-destroy artifacts: rxjs `UnsubscriptionError` wrapping `Not connected`, and polkadot-api `DisjointError` from a chainHead unfollow race. Pre-0.2.0 `@parity/product-sdk-terminal::destroy()` also surfaced `@polkadot-api/raw-client::DestroyedError("Client destroyed")` on `dot logout` because it tore down the lazy client without draining pending statement-subscription unsubscribes. The 0.2.0 fix made `destroy()` async and `await`s `lazyClient.awaitPendingUnsubs()` between dispose and disconnect, so we no longer suppress that shape — if it ever resurfaces it's a real regression; (2) `scheduleHardExit()` installs an `unref`'d timer that kills the process if the event loop doesn't drain within a grace period; (3) `startMemoryWatchdog()` aborts if RSS exceeds 4 GB — a generous cap because legit deploys on Bun SEA binaries routinely touch 1–1.5 GB from runtime-metadata decoding + Bun's JSC heap + Ink yoga. Do NOT re-add a per-window growth detector: we tried 300 MB / 3 s and it false-positived on the single-burst metadata-loading spike, aborting deploys that would have succeeded. Set `DOT_MEMORY_TRACE=1` to stream per-sample RSS/heap/external stats — useful when diagnosing a real leak report. **Telemetry bootstrap** (`src/bootstrap.ts`) is the FIRST import in `src/index.ts`. It sets `BULLETIN_DEPLOY_USE_AMBIENT_SENTRY=1` and `BULLETIN_DEPLOY_HOST_APP=playground-cli` before `bulletin-deploy` can evaluate, then maps `DOT_TELEMETRY`/internal-context detection to `BULLETIN_DEPLOY_TELEMETRY`. Do not leave `BULLETIN_DEPLOY_TELEMETRY` unset while setting the host app: `bulletin-deploy` treats `playground-cli` as an internal host, which would enable deploy telemetry for external users. `BULLETIN_DEPLOY_MEM_REPORT` is not forced off by default anymore because upstream guards the Bun-incompatible memory-report path. Any new long-running command should register a cleanup hook via `onProcessShutdown()`. - **Parser MUST NOT emit an event per log line.** `DeployLogParser.feed()` is called for every console line bulletin-deploy prints — hundreds per deploy on the happy path, thousands if retries fire. We intentionally emit events ONLY for phase-banner matches and `[N/M]` chunk progress. Everything else returns `null`. Adding a catch-all `info` emit turns the parser into a firehose that allocates ~200 bytes × thousands of lines, and was a measurable contributor to chunk-upload memory pressure. diff --git a/package.json b/package.json index 1d4ebf6..4ffa927 100644 --- a/package.json +++ b/package.json @@ -36,7 +36,7 @@ "@parity/product-sdk-utils": "^0.1.1", "@polkadot-api/sdk-ink": "^0.7.0", "@sentry/node": "^9.47.1", - "bulletin-deploy": "0.7.13", + "bulletin-deploy": "0.7.14", "commander": "^12.0.0", "ink": "^5.2.1", "polkadot-api": "^2.1.2", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 9349e73..43f5e21 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -61,8 +61,8 @@ importers: specifier: ^9.47.1 version: 9.47.1 bulletin-deploy: - specifier: 0.7.13 - version: 0.7.13(@polkadot/util@14.0.3)(postcss@8.5.10)(react-native@0.85.2(@babel/core@7.29.0)(@types/react@18.3.28)(react@18.3.1))(rxjs@7.8.2)(typescript@5.9.3)(yaml@2.8.4) + specifier: 0.7.14 + version: 0.7.14(@polkadot/util@14.0.3)(postcss@8.5.10)(react-native@0.85.2(@babel/core@7.29.0)(@types/react@18.3.28)(react@18.3.1))(rxjs@7.8.2)(typescript@5.9.3)(yaml@2.8.4) commander: specifier: ^12.0.0 version: 12.1.0 @@ -2360,8 +2360,8 @@ packages: buffer@6.0.3: resolution: {integrity: sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==} - bulletin-deploy@0.7.13: - resolution: {integrity: sha512-2e0Eu440L1YZ/ZIMkBQWiUi8e4wPxphyZIoYU7Ywf+Rdzd/a386HP49JpL/TMJMWTHlrjg5R4Q5oduJc2I6Wjw==} + bulletin-deploy@0.7.14: + resolution: {integrity: sha512-3UtrwjG+ETHetBDwlg4MLrDKoWs0nJ40h4anfaXBlslzmrupwxSrgiL5qubTe8y0ZX2yE52F9uaqq08cieNLoA==} engines: {node: '>=22'} hasBin: true @@ -7180,7 +7180,7 @@ snapshots: dependencies: '@noble/hashes': 2.2.0 '@polkadot-api/utils': 0.2.0 - '@scure/base': 2.0.0 + '@scure/base': 2.2.0 scale-ts: 1.6.1 '@polkadot-api/substrate-bindings@0.17.0': @@ -8315,7 +8315,7 @@ snapshots: base64-js: 1.5.1 ieee754: 1.2.1 - bulletin-deploy@0.7.13(@polkadot/util@14.0.3)(postcss@8.5.10)(react-native@0.85.2(@babel/core@7.29.0)(@types/react@18.3.28)(react@18.3.1))(rxjs@7.8.2)(typescript@5.9.3)(yaml@2.8.4): + bulletin-deploy@0.7.14(@polkadot/util@14.0.3)(postcss@8.5.10)(react-native@0.85.2(@babel/core@7.29.0)(@types/react@18.3.28)(react@18.3.1))(rxjs@7.8.2)(typescript@5.9.3)(yaml@2.8.4): dependencies: '@ipld/car': 5.4.3 '@ipld/dag-pb': 4.1.5