From e844e53fe126ccfe4e061ffc682e9d587adf1197 Mon Sep 17 00:00:00 2001
From: Jessica Mulein <jessica@digitaldefiance.org>
Date: Wed, 27 May 2026 10:15:12 -0700
Subject: [PATCH 01/37] feat(sessions): optional AES-256-GCM encryption for
 saved sessions

Add --session-encrypt with CECLI_SESSION_KEY or --session-key-file,
wire encrypt/decrypt through SessionManager save/load/list, and document
usage. Plaintext JSON remains the default when encryption is off.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 cecli/args.py                        |  18 +++
 cecli/session_crypto.py              | 108 +++++++++++++
 cecli/sessions.py                    | 102 +++++++++++--
 cecli/website/docs/usage/sessions.md |  11 ++
 requirements/requirements.in         |   1 +
 tests/basic/conftest.py              |  24 +++
 tests/basic/test_session_args.py     |  31 ++++
 tests/basic/test_session_crypto.py   | 100 ++++++++++++
 tests/basic/test_sessions_manager.py | 219 +++++++++++++++++++++++++++
 9 files changed, 603 insertions(+), 11 deletions(-)
 create mode 100644 cecli/session_crypto.py
 create mode 100644 tests/basic/conftest.py
 create mode 100644 tests/basic/test_session_args.py
 create mode 100644 tests/basic/test_session_crypto.py
 create mode 100644 tests/basic/test_sessions_manager.py

diff --git a/cecli/args.py b/cecli/args.py
index 387f4764e78..07b947e17ae 100644
--- a/cecli/args.py
+++ b/cecli/args.py
@@ -370,6 +370,24 @@ def get_parser(default_config_files, git_root):
             " (default: False)"
         ),
     )
+    group.add_argument(
+        "--session-encrypt",
+        action=argparse.BooleanOptionalAction,
+        default=False,
+        help=(
+            "Encrypt saved sessions on disk (AES-256-GCM). Requires CECLI_SESSION_KEY or"
+            " --session-key-file (default: False)"
+        ),
+    )
+    group.add_argument(
+        "--session-key-file",
+        metavar="SESSION_KEY_FILE",
+        default=None,
+        help=(
+            "File containing a urlsafe-base64 32-byte session encryption key"
+            " (default: use CECLI_SESSION_KEY only)"
+        ),
+    ).complete = shtab.FILE
     group.add_argument(
         "--mcp-servers",
         metavar="MCP_CONFIG_JSON",
diff --git a/cecli/session_crypto.py b/cecli/session_crypto.py
new file mode 100644
index 00000000000..dbbcab8a9a1
--- /dev/null
+++ b/cecli/session_crypto.py
@@ -0,0 +1,108 @@
+"""Optional AES-256-GCM encryption for on-disk cecli session files."""
+
+from __future__ import annotations
+
+import base64
+import json
+import os
+from pathlib import Path
+from typing import Any
+
+MAGIC = b"CECLI_ENCRYPTED_SESSION_v1\n"
+KEY_ENV = "CECLI_SESSION_KEY"
+KEY_BYTES = 32
+
+
+class SessionCryptoError(Exception):
+    """Session encrypt/decrypt failed."""
+
+
+def is_encrypted_payload(data: bytes) -> bool:
+    return data.startswith(MAGIC)
+
+
+def resolve_key(*, key_file: str | Path | None = None) -> bytes | None:
+    """Load a 32-byte key from CECLI_SESSION_KEY (urlsafe base64) or a key file."""
+    raw = os.environ.get(KEY_ENV, "").strip()
+    if raw:
+        key = _decode_key_b64(raw)
+        if key is not None:
+            return key
+    if key_file:
+        path = Path(key_file).expanduser()
+        if path.is_file():
+            text = path.read_text(encoding="utf-8").strip()
+            key = _decode_key_b64(text)
+            if key is not None:
+                return key
+    return None
+
+
+def _decode_key_b64(text: str) -> bytes | None:
+    try:
+        padded = text + "=" * (-len(text) % 4)
+        key = base64.urlsafe_b64decode(padded.encode("ascii"))
+    except (ValueError, UnicodeEncodeError):
+        return None
+    if len(key) != KEY_BYTES:
+        return None
+    return key
+
+
+def encrypt_session_dict(session_data: dict[str, Any], key: bytes) -> bytes:
+    if len(key) != KEY_BYTES:
+        raise SessionCryptoError(f"Session key must be {KEY_BYTES} bytes.")
+    try:
+        from cryptography.hazmat.primitives.ciphers.aead import AESGCM
+    except ImportError as err:
+        raise SessionCryptoError(
+            "Session encryption requires the cryptography package (pip install cryptography)."
+        ) from err
+
+    plaintext = json.dumps(session_data, ensure_ascii=False).encode("utf-8")
+    nonce = os.urandom(12)
+    ciphertext = AESGCM(key).encrypt(nonce, plaintext, None)
+    payload = base64.urlsafe_b64encode(nonce + ciphertext).decode("ascii")
+    return MAGIC + payload.encode("ascii") + b"\n"
+
+
+def decrypt_session_bytes(data: bytes, key: bytes) -> dict[str, Any]:
+    if len(key) != KEY_BYTES:
+        raise SessionCryptoError(f"Session key must be {KEY_BYTES} bytes.")
+    if not is_encrypted_payload(data):
+        try:
+            parsed = json.loads(data.decode("utf-8"))
+        except json.JSONDecodeError as err:
+            raise SessionCryptoError("Invalid session file (not JSON).") from err
+        if not isinstance(parsed, dict):
+            raise SessionCryptoError("Invalid session format.")
+        return parsed
+
+    body = data[len(MAGIC) :].strip()
+    if not body:
+        raise SessionCryptoError("Encrypted session file is empty.")
+    try:
+        from cryptography.hazmat.primitives.ciphers.aead import AESGCM
+    except ImportError as err:
+        raise SessionCryptoError(
+            "Session encryption requires the cryptography package (pip install cryptography)."
+        ) from err
+
+    try:
+        blob = base64.urlsafe_b64decode(body + b"=" * (-len(body) % 4))
+    except ValueError as err:
+        raise SessionCryptoError("Encrypted session payload is invalid.") from err
+    if len(blob) < 13:
+        raise SessionCryptoError("Encrypted session payload is too short.")
+    nonce, ciphertext = blob[:12], blob[12:]
+    try:
+        plaintext = AESGCM(key).decrypt(nonce, ciphertext, None)
+    except Exception as err:
+        raise SessionCryptoError("Could not decrypt session (wrong key or corrupted file).") from err
+    try:
+        parsed = json.loads(plaintext.decode("utf-8"))
+    except json.JSONDecodeError as err:
+        raise SessionCryptoError("Decrypted session is not valid JSON.") from err
+    if not isinstance(parsed, dict):
+        raise SessionCryptoError("Invalid session format.")
+    return parsed
diff --git a/cecli/sessions.py b/cecli/sessions.py
index f1ee5a12570..d097b48fa4e 100644
--- a/cecli/sessions.py
+++ b/cecli/sessions.py
@@ -6,6 +6,7 @@
 from typing import Dict, List, Optional
 
 from cecli import models
+from cecli import session_crypto
 from cecli.helpers.conversation import ConversationService, MessageTag
 
 
@@ -22,6 +23,67 @@ def _get_session_directory(self) -> Path:
         os.makedirs(session_dir, exist_ok=True)
         return session_dir
 
+    def _session_encrypt_settings(self) -> tuple[bool, bytes | None]:
+        args = getattr(self.coder, "args", None)
+        if not args or not getattr(args, "session_encrypt", False):
+            return False, None
+        key_file = getattr(args, "session_key_file", None)
+        return True, session_crypto.resolve_key(key_file=key_file)
+
+    def _read_session_file(self, session_file: Path) -> dict | None:
+        try:
+            data = session_file.read_bytes()
+        except OSError as e:
+            self.io.tool_error(f"Error reading session: {e}")
+            return None
+        try:
+            if session_crypto.is_encrypted_payload(data):
+                args = getattr(self.coder, "args", None)
+                key_file = getattr(args, "session_key_file", None) if args else None
+                key = session_crypto.resolve_key(key_file=key_file)
+                if not key:
+                    self.io.tool_error(
+                        "Session is encrypted but no key is configured "
+                        f"({session_crypto.KEY_ENV} or --session-key-file)."
+                    )
+                    return None
+                return session_crypto.decrypt_session_bytes(data, key)
+            parsed = json.loads(data.decode("utf-8"))
+            if not isinstance(parsed, dict):
+                self.io.tool_error("Invalid session format.")
+                return None
+            return parsed
+        except session_crypto.SessionCryptoError as e:
+            self.io.tool_error(str(e))
+            return None
+        except json.JSONDecodeError as e:
+            self.io.tool_error(f"Error loading session: {e}")
+            return None
+
+    def _write_session_file(self, session_file: Path, session_data: dict) -> bool:
+        encrypt_enabled, key = self._session_encrypt_settings()
+        try:
+            if encrypt_enabled:
+                if not key:
+                    self.io.tool_error(
+                        "Session encryption is enabled but no key is configured "
+                        f"({session_crypto.KEY_ENV} or --session-key-file)."
+                    )
+                    return False
+                session_file.write_bytes(
+                    session_crypto.encrypt_session_dict(session_data, key)
+                )
+            else:
+                with open(session_file, "w", encoding="utf-8") as f:
+                    json.dump(session_data, f, indent=2)
+            return True
+        except session_crypto.SessionCryptoError as e:
+            self.io.tool_error(str(e))
+            return False
+        except OSError as e:
+            self.io.tool_error(f"Error saving session: {e}")
+            return False
+
     def save_session(self, session_name: str, output=True) -> bool:
         """Save the current chat session to a named file."""
         if not session_name:
@@ -39,11 +101,12 @@ def save_session(self, session_name: str, output=True) -> bool:
 
         try:
             session_data = self._build_session_data(session_name)
-            with open(session_file, "w", encoding="utf-8") as f:
-                json.dump(session_data, f, indent=2)
+            if not self._write_session_file(session_file, session_data):
+                return False
 
             if output:
-                self.io.tool_output(f"Session saved: {session_file}")
+                suffix = " (encrypted)" if self._session_encrypt_settings()[0] else ""
+                self.io.tool_output(f"Session saved: {session_file}{suffix}")
 
             return True
 
@@ -63,8 +126,27 @@ def list_sessions(self) -> List[Dict]:
         sessions = []
         for session_file in sorted(session_files, key=lambda x: x.stat().st_mtime, reverse=True):
             try:
-                with open(session_file, "r", encoding="utf-8") as f:
-                    session_data = json.load(f)
+                raw = session_file.read_bytes()
+                if session_crypto.is_encrypted_payload(raw):
+                    _, key = self._session_encrypt_settings()
+                    if not key:
+                        sessions.append(
+                            {
+                                "name": session_file.stem,
+                                "file": session_file,
+                                "model": "encrypted",
+                                "edit_format": "—",
+                                "num_messages": 0,
+                                "num_files": 0,
+                                "encrypted": True,
+                            }
+                        )
+                        continue
+                    session_data = session_crypto.decrypt_session_bytes(raw, key)
+                else:
+                    session_data = json.loads(raw.decode("utf-8"))
+                    if not isinstance(session_data, dict):
+                        raise ValueError("not a session object")
 
                 session_info = {
                     "name": session_file.stem,
@@ -80,6 +162,7 @@ def list_sessions(self) -> List[Dict]:
                         + len(session_data.get("files", {}).get("read_only", []))
                         + len(session_data.get("files", {}).get("read_only_stubs", []))
                     ),
+                    "encrypted": session_crypto.is_encrypted_payload(raw),
                 }
                 sessions.append(session_info)
 
@@ -99,15 +182,12 @@ async def load_session(self, session_identifier: str, switch=True) -> bool:
         if not session_file:
             return False
 
-        try:
-            with open(session_file, "r", encoding="utf-8") as f:
-                session_data = json.load(f)
-        except Exception as e:
-            self.io.tool_error(f"Error loading session: {e}")
+        session_data = self._read_session_file(session_file)
+        if session_data is None:
             return False
 
         # Verify session format
-        if not isinstance(session_data, dict) or "version" not in session_data:
+        if "version" not in session_data:
             self.io.tool_error("Invalid session format.")
             return False
 
diff --git a/cecli/website/docs/usage/sessions.md b/cecli/website/docs/usage/sessions.md
index ada211cea95..ff7dc663b9b 100644
--- a/cecli/website/docs/usage/sessions.md
+++ b/cecli/website/docs/usage/sessions.md
@@ -158,6 +158,17 @@ Sessions are stored as JSON files in the `.cecli/sessions/` directory within you
 ### Version Control
 - Consider adding `.cecli/sessions/` to your `.gitignore` if sessions contain sensitive information
 
+### Optional encryption (AES-256-GCM)
+
+When enabled, session files on disk are encrypted (plaintext JSON is unchanged when disabled).
+
+```bash
+export CECLI_SESSION_KEY="$(python -c 'import os,base64; print(base64.urlsafe_b64encode(os.urandom(32)).decode())')"
+cecli --session-encrypt --auto-save
+```
+
+Or use `--session-key-file` pointing at a file with the same urlsafe-base64 32-byte key. BrightVision stores the key in the OS keychain and sets `CECLI_SESSION_KEY` for the Vision API process.
+
 ## Troubleshooting
 
 ### Session Not Found
diff --git a/requirements/requirements.in b/requirements/requirements.in
index 895678008c9..26d155cda8c 100644
--- a/requirements/requirements.in
+++ b/requirements/requirements.in
@@ -32,6 +32,7 @@ tomlkit>=0.14.0
 truststore
 xxhash>=3.6.0
 py-cymbal>=0.1.24
+cryptography>=42.0.0
 
 # Replaced networkx with rustworkx for better performance in repomap
 rustworkx>=0.15.0
diff --git a/tests/basic/conftest.py b/tests/basic/conftest.py
new file mode 100644
index 00000000000..b5126377867
--- /dev/null
+++ b/tests/basic/conftest.py
@@ -0,0 +1,24 @@
+"""Shared fixtures for cecli basic tests."""
+
+import base64
+import os
+
+import pytest
+
+from cecli import session_crypto
+
+
+@pytest.fixture
+def session_key32():
+    return os.urandom(session_crypto.KEY_BYTES)
+
+
+@pytest.fixture
+def session_key_b64(session_key32):
+    return base64.urlsafe_b64encode(session_key32).decode().rstrip("=")
+
+
+@pytest.fixture
+def session_key_env(monkeypatch, session_key32, session_key_b64):
+    monkeypatch.setenv(session_crypto.KEY_ENV, session_key_b64)
+    return session_key32
diff --git a/tests/basic/test_session_args.py b/tests/basic/test_session_args.py
new file mode 100644
index 00000000000..7576ee5c439
--- /dev/null
+++ b/tests/basic/test_session_args.py
@@ -0,0 +1,31 @@
+"""CLI args for session encryption and auto-save."""
+
+from cecli.args import get_parser
+
+
+def test_session_encrypt_defaults_off():
+    parser = get_parser([], "/tmp/project")
+    args = parser.parse_args([])
+    assert args.session_encrypt is False
+    assert args.session_key_file is None
+    assert args.auto_save is False
+    assert args.auto_load is False
+    assert args.auto_save_session_name == "auto-save"
+
+
+def test_session_encrypt_flag():
+    parser = get_parser([], "/tmp/project")
+    args = parser.parse_args(["--session-encrypt"])
+    assert args.session_encrypt is True
+
+
+def test_session_encrypt_no_flag():
+    parser = get_parser([], "/tmp/project")
+    args = parser.parse_args(["--no-session-encrypt"])
+    assert args.session_encrypt is False
+
+
+def test_session_key_file_flag():
+    parser = get_parser([], "/tmp/project")
+    args = parser.parse_args(["--session-key-file", "/tmp/key.bin"])
+    assert args.session_key_file == "/tmp/key.bin"
diff --git a/tests/basic/test_session_crypto.py b/tests/basic/test_session_crypto.py
new file mode 100644
index 00000000000..949e617adfb
--- /dev/null
+++ b/tests/basic/test_session_crypto.py
@@ -0,0 +1,100 @@
+"""Unit tests for cecli.session_crypto."""
+
+import base64
+import json
+import os
+
+import pytest
+
+from cecli import session_crypto
+
+
+def test_roundtrip_encrypted(session_key32):
+    data = {"version": 1, "session_name": "t", "model": "gpt-4"}
+    blob = session_crypto.encrypt_session_dict(data, session_key32)
+    assert session_crypto.is_encrypted_payload(blob)
+    assert session_crypto.decrypt_session_bytes(blob, session_key32) == data
+
+
+def test_plaintext_json_still_loads(session_key32):
+    raw = json.dumps({"version": 1}).encode("utf-8")
+    assert not session_crypto.is_encrypted_payload(raw)
+    out = session_crypto.decrypt_session_bytes(raw, session_key32)
+    assert out["version"] == 1
+
+
+def test_wrong_key_fails(session_key32):
+    blob = session_crypto.encrypt_session_dict({"version": 1}, session_key32)
+    with pytest.raises(session_crypto.SessionCryptoError):
+        session_crypto.decrypt_session_bytes(blob, os.urandom(32))
+
+
+def test_invalid_key_length_rejected():
+    with pytest.raises(session_crypto.SessionCryptoError):
+        session_crypto.encrypt_session_dict({"version": 1}, b"short")
+
+
+def test_resolve_key_from_env(session_key_env, session_key32):
+    assert session_crypto.resolve_key() == session_key32
+
+
+def test_resolve_key_from_file(tmp_path, session_key32):
+    path = tmp_path / "key.txt"
+    path.write_text(base64.urlsafe_b64encode(session_key32).decode(), encoding="utf-8")
+    assert session_crypto.resolve_key(key_file=path) == session_key32
+
+
+def test_resolve_key_missing_returns_none(monkeypatch):
+    monkeypatch.delenv(session_crypto.KEY_ENV, raising=False)
+    assert session_crypto.resolve_key() is None
+
+
+def test_resolve_key_rejects_bad_env(monkeypatch):
+    monkeypatch.setenv(session_crypto.KEY_ENV, "not-valid-key-material")
+    assert session_crypto.resolve_key() is None
+
+
+def test_magic_prefix_constant():
+    assert session_crypto.MAGIC.startswith(b"CECLI_ENCRYPTED_SESSION")
+
+
+def test_corrupt_ciphertext_raises(session_key32):
+    blob = session_crypto.MAGIC + b"not-valid-base64!!!\n"
+    with pytest.raises(session_crypto.SessionCryptoError):
+        session_crypto.decrypt_session_bytes(blob, session_key32)
+
+
+def test_empty_encrypted_body_raises(session_key32):
+    blob = session_crypto.MAGIC + b"\n"
+    with pytest.raises(session_crypto.SessionCryptoError):
+        session_crypto.decrypt_session_bytes(blob, session_key32)
+
+
+def test_encrypted_file_roundtrip_on_disk(tmp_path, session_key32):
+    path = tmp_path / "sess.json"
+    payload = {"version": 1, "session_name": "disk", "chat_history": {"done_messages": [], "cur_messages": []}}
+    path.write_bytes(session_crypto.encrypt_session_dict(payload, session_key32))
+    raw = path.read_bytes()
+    assert session_crypto.is_encrypted_payload(raw)
+    assert session_crypto.decrypt_session_bytes(raw, session_key32) == payload
+
+
+def test_unicode_roundtrip(session_key32):
+    payload = {"version": 1, "session_name": "t", "todo_list": "— fix café naïve"}
+    blob = session_crypto.encrypt_session_dict(payload, session_key32)
+    assert session_crypto.decrypt_session_bytes(blob, session_key32) == payload
+
+
+def test_cryptography_import_error(monkeypatch):
+    import builtins
+
+    real_import = builtins.__import__
+
+    def fake_import(name, *args, **kwargs):
+        if name == "cryptography.hazmat.primitives.ciphers.aead":
+            raise ImportError("blocked for test")
+        return real_import(name, *args, **kwargs)
+
+    monkeypatch.setattr(builtins, "__import__", fake_import)
+    with pytest.raises(session_crypto.SessionCryptoError, match="cryptography"):
+        session_crypto.encrypt_session_dict({"version": 1}, os.urandom(32))
diff --git a/tests/basic/test_sessions_manager.py b/tests/basic/test_sessions_manager.py
new file mode 100644
index 00000000000..d56ae834e08
--- /dev/null
+++ b/tests/basic/test_sessions_manager.py
@@ -0,0 +1,219 @@
+"""SessionManager on-disk persistence and optional encryption."""
+
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import MagicMock
+
+import pytest
+
+from cecli import session_crypto
+from cecli.io import InputOutput
+from cecli.sessions import SessionManager
+
+
+def _prepare_workspace(coder, tmp_path) -> Path:
+    root = Path(tmp_path)
+    coder.abs_root_path.side_effect = lambda x: str(root / x)
+    (root / ".cecli" / "sessions").mkdir(parents=True, exist_ok=True)
+    (root / "file1.py").write_text("", encoding="utf-8")
+    return root
+
+
+@pytest.fixture
+def mock_coder(monkeypatch):
+    main_model = MagicMock()
+    main_model.name = "test_model"
+    main_model.weak_model.name = "weak"
+    main_model.editor_model.name = "editor"
+    main_model.agent_model.name = "agent"
+    main_model.editor_edit_format = "editor-diff"
+    main_model.retries = 0
+    main_model.debug = False
+
+    conv_manager = MagicMock()
+    conv_manager.get_messages_dict.return_value = []
+    files_manager = MagicMock()
+    monkeypatch.setattr(
+        "cecli.sessions.ConversationService.get_manager",
+        lambda _coder: conv_manager,
+    )
+    monkeypatch.setattr(
+        "cecli.sessions.ConversationService.get_files",
+        lambda _coder: files_manager,
+    )
+    monkeypatch.setattr(
+        "cecli.sessions.models.Model",
+        lambda *args, **kwargs: main_model,
+    )
+
+    coder = MagicMock()
+    coder.abs_fnames = set()
+    coder.abs_read_only_fnames = set()
+    coder.abs_read_only_stubs_fnames = set()
+    coder.auto_commits = True
+    coder.auto_lint = True
+    coder.auto_test = False
+    coder.total_tokens_sent = 0
+    coder.total_tokens_received = 0
+    coder.total_cached_tokens = 0
+    coder.total_cost = 0.0
+    coder.edit_format = "diff"
+    coder.format_chat_chunks = MagicMock()
+    coder.get_rel_fname.side_effect = lambda x: os.path.basename(x)
+    coder.local_agent_folder.side_effect = lambda x: f".cecli/{x}"
+    coder.io = MagicMock(spec=InputOutput)
+    coder.agent_config = {}
+    coder.mcp_manager = None
+    coder.skills_manager = None
+    coder.main_model = main_model
+    coder.args = SimpleNamespace(
+        model="test_model",
+        weak_model="weak",
+        editor_model="editor",
+        agent_model="agent",
+        editor_edit_format="editor-diff",
+        verbose=False,
+        session_encrypt=False,
+        session_key_file=None,
+    )
+    return coder
+
+
+@pytest.fixture
+def session_manager(mock_coder):
+    return SessionManager(mock_coder, mock_coder.io)
+
+
+@pytest.fixture
+def encrypt_coder(mock_coder, session_key_env):
+    mock_coder.args = SimpleNamespace(
+        model="test_model",
+        weak_model="weak",
+        editor_model="editor",
+        agent_model="agent",
+        editor_edit_format="editor-diff",
+        verbose=False,
+        session_encrypt=True,
+        session_key_file=None,
+    )
+    return mock_coder
+
+
+def test_save_plaintext_json(session_manager, mock_coder, tmp_path):
+    root = _prepare_workspace(mock_coder, tmp_path)
+    assert session_manager.save_session("plain", output=False)
+    path = root / ".cecli" / "sessions" / "plain.json"
+    raw = path.read_bytes()
+    assert raw.startswith(b"{")
+    data = json.loads(raw.decode("utf-8"))
+    assert data["session_name"] == "plain"
+    assert data["version"] == 1
+
+
+def test_save_encrypted_blob(encrypt_coder, session_key32, tmp_path):
+    manager = SessionManager(encrypt_coder, encrypt_coder.io)
+    root = _prepare_workspace(encrypt_coder, tmp_path)
+    assert manager.save_session("secret", output=False)
+    path = root / ".cecli" / "sessions" / "secret.json"
+    raw = path.read_bytes()
+    assert session_crypto.is_encrypted_payload(raw)
+    assert session_crypto.decrypt_session_bytes(raw, session_key32)["session_name"] == "secret"
+
+
+def test_save_encrypt_without_key_fails(mock_coder, monkeypatch, tmp_path):
+    monkeypatch.delenv(session_crypto.KEY_ENV, raising=False)
+    _prepare_workspace(mock_coder, tmp_path)
+    mock_coder.args = SimpleNamespace(
+        model="test_model",
+        weak_model="weak",
+        editor_model="editor",
+        agent_model="agent",
+        editor_edit_format="editor-diff",
+        verbose=False,
+        session_encrypt=True,
+        session_key_file=None,
+    )
+    assert SessionManager(mock_coder, mock_coder.io).save_session("nope", output=False) is False
+
+
+def test_list_encrypted_with_key(encrypt_coder, tmp_path):
+    manager = SessionManager(encrypt_coder, encrypt_coder.io)
+    _prepare_workspace(encrypt_coder, tmp_path)
+    manager.save_session("listed", output=False)
+    rows = manager.list_sessions()
+    assert len(rows) == 1
+    assert rows[0]["name"] == "listed"
+    assert rows[0].get("encrypted") is True
+    assert rows[0]["model"] == "test_model"
+
+
+def test_list_encrypted_placeholder_without_key(encrypt_coder, monkeypatch, tmp_path):
+    manager = SessionManager(encrypt_coder, encrypt_coder.io)
+    _prepare_workspace(encrypt_coder, tmp_path)
+    manager.save_session("locked", output=False)
+    monkeypatch.delenv(session_crypto.KEY_ENV, raising=False)
+    encrypt_coder.args = SimpleNamespace(
+        model="test_model",
+        weak_model="weak",
+        editor_model="editor",
+        agent_model="agent",
+        editor_edit_format="editor-diff",
+        verbose=False,
+        session_encrypt=False,
+        session_key_file=None,
+    )
+    rows = manager.list_sessions()
+    assert rows[0]["encrypted"] is True
+    assert rows[0]["model"] == "encrypted"
+
+
+def test_read_legacy_plaintext_when_encrypt_enabled(encrypt_coder, tmp_path):
+    manager = SessionManager(encrypt_coder, encrypt_coder.io)
+    root = _prepare_workspace(encrypt_coder, tmp_path)
+    legacy = root / ".cecli" / "sessions" / "legacy.json"
+    legacy.write_text(
+        json.dumps({"version": 1, "session_name": "legacy", "model": "test_model"}),
+        encoding="utf-8",
+    )
+    data = manager._read_session_file(legacy)
+    assert data is not None
+    assert data["session_name"] == "legacy"
+
+
+@pytest.mark.asyncio
+async def test_load_encrypted_without_switch(encrypt_coder, session_key32, tmp_path):
+    manager = SessionManager(encrypt_coder, encrypt_coder.io)
+    root = _prepare_workspace(encrypt_coder, tmp_path)
+    encrypt_coder.edit_format = "ask"
+    assert manager.save_session("enc", output=False)
+    encrypt_coder.edit_format = "diff"
+    path = root / ".cecli" / "sessions" / "enc.json"
+    assert await manager.load_session(str(path), switch=False) is True
+    loaded = session_crypto.decrypt_session_bytes(path.read_bytes(), session_key32)
+    assert loaded["edit_format"] == "ask"
+
+
+@pytest.mark.asyncio
+async def test_load_encrypted_using_env_key_only(encrypt_coder, session_key_env, tmp_path):
+    manager = SessionManager(encrypt_coder, encrypt_coder.io)
+    root = _prepare_workspace(encrypt_coder, tmp_path)
+    encrypt_coder.edit_format = "architect"
+    manager.save_session("env", output=False)
+    encrypt_coder.args = SimpleNamespace(
+        model="test_model",
+        weak_model="weak",
+        editor_model="editor",
+        agent_model="agent",
+        editor_edit_format="editor-diff",
+        verbose=False,
+        session_encrypt=False,
+        session_key_file=None,
+    )
+    path = root / ".cecli" / "sessions" / "env.json"
+    assert await manager.load_session(str(path), switch=False) is True
+    loaded = session_crypto.decrypt_session_bytes(path.read_bytes(), session_key_env)
+    assert loaded["edit_format"] == "architect"

From da1ae6b5d776e5ac032572cb06c4d533a0c8a425 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Wed, 27 May 2026 11:12:32 -0700
Subject: [PATCH 02/37] cli-29: finally fix interruption exception on
 acompletion

---
 cecli/models.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/cecli/models.py b/cecli/models.py
index 4148682b44b..b9ddccca402 100644
--- a/cecli/models.py
+++ b/cecli/models.py
@@ -1378,6 +1378,11 @@ async def simple_send_with_retries(
                 continue
             except AttributeError:
                 return None
+            except KeyboardInterrupt:
+                # An interrupt was not caught within the async run loop.
+                # We'll just pass to allow the thread to exit gracefully
+                # without a scary traceback.
+                pass
 
     def model_error_response(self):
         return litellm.ModelResponse(

From cdef3021b2f9f747b839452f131b7b73b88114ae Mon Sep 17 00:00:00 2001
From: Jessica Mulein <jessica@digitaldefiance.org>
Date: Wed, 27 May 2026 11:50:45 -0700
Subject: [PATCH 03/37] fix(agent): guard missing verbose on headless args

---
 cecli/coders/agent_coder.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cecli/coders/agent_coder.py b/cecli/coders/agent_coder.py
index 9c5e5816a03..aff8aa865ff 100644
--- a/cecli/coders/agent_coder.py
+++ b/cecli/coders/agent_coder.py
@@ -265,7 +265,8 @@ def get_local_tool_schemas(self):
 
     async def initialize_mcp_tools(self):
         if not self.mcp_manager:
-            self.mcp_manager = McpServerManager([], self.io, self.args.verbose)
+            verbose = getattr(self.args, "verbose", False) if self.args else False
+            self.mcp_manager = McpServerManager([], self.io, verbose)
 
         server_name = "Local"
         server = self.mcp_manager.get_server(server_name)

From 3cdd958b99110a6c3f68a7139ed6806d5b4a8493 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Wed, 27 May 2026 20:58:05 -0700
Subject: [PATCH 04/37] fix: Catch BaseException in worker thread to prevent
 tracebacks

Co-authored-by: cecli (openai/gemini_cli_local/gemini-2.5-pro)
---
 cecli/tui/worker.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/cecli/tui/worker.py b/cecli/tui/worker.py
index 259ca0775eb..5a19e1567a4 100644
--- a/cecli/tui/worker.py
+++ b/cecli/tui/worker.py
@@ -49,10 +49,9 @@ def _run_thread(self):
 
         try:
             self.loop.run_until_complete(self._async_run())
-        except asyncio.CancelledError:
-            pass
-        except RuntimeError:
-            # Event loop stopped - this is expected during shutdown
+        except BaseException:
+            # Catch anything that could bring down the thread, and just let it exit.
+            # This includes KeyboardInterrupt, SystemExit, etc.
             pass
         finally:
             self._cleanup_loop()
@@ -188,11 +187,6 @@ def stop(self):
                 # We'll just pass to allow the thread to exit gracefully
                 # without a scary traceback.
                 pass
-            except KeyboardInterrupt:
-                # An interrupt was not caught within the async run loop.
-                # We'll just pass to allow the thread to exit gracefully
-                # without a scary traceback.
-                pass
         self.interrupt()
 
         # Wait for thread to finish

From e863bd2d27831183c236ec5fa68bfca3f19f6b8d Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Wed, 27 May 2026 22:08:41 -0700
Subject: [PATCH 05/37] fix: Update spinner methods to accept coder_uuid

Co-authored-by: cecli (openai/gemini_ai_studio/gemini-3-flash-preview)
---
 cecli/tui/io.py             | 21 +++++++++++++++++----
 cecli/tui/widgets/footer.py |  6 ++++++
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/cecli/tui/io.py b/cecli/tui/io.py
index f204bf2c44c..ff20b443de0 100644
--- a/cecli/tui/io.py
+++ b/cecli/tui/io.py
@@ -328,13 +328,15 @@ def _reroute_output(self, text, msg_type, **kwargs):
 
         return False
 
-    def start_spinner(self, text, update_last_text=True):
+    def start_spinner(self, text, update_last_text=True, **kwargs):
         """Override start_spinner to send spinner state to TUI.
 
         Args:
             text: Spinner text
             update_last_text: Whether to update last_spinner_text
+            coder_uuid: Optional uuid string to include in the message
         """
+        coder_uuid = kwargs.get("coder_uuid", None)
         # Call parent to maintain state
         super().start_spinner(text, update_last_text)
 
@@ -344,23 +346,27 @@ def start_spinner(self, text, update_last_text=True):
                 "type": "spinner",
                 "action": "start",
                 "text": text,
+                "coder_uuid": coder_uuid,
             }
         )
 
         self.output_queue.put(
             {
                 "type": "spinner",
+                "coder_uuid": coder_uuid,
                 "action": "update_suffix",
                 "text": "",
             }
         )
 
-    def update_spinner(self, text):
+    def update_spinner(self, text, **kwargs):
         """Override update_spinner to send updates to TUI.
 
         Args:
             text: New spinner text
+            coder_uuid: Optional uuid string to include in the message
         """
+        coder_uuid = kwargs.get("coder_uuid", None)
         # Call parent
         super().update_spinner(text)
 
@@ -370,15 +376,18 @@ def update_spinner(self, text):
                 "type": "spinner",
                 "action": "update",
                 "text": text,
+                "coder_uuid": coder_uuid,
             }
         )
 
-    def update_spinner_suffix(self, text=None):
+    def update_spinner_suffix(self, text=None, **kwargs):
         """Override update_spinner_suffix to send updates to TUI.
 
         Args:
             text: New spinner suffix text
+            coder_uuid: Optional uuid string to include in the message
         """
+        coder_uuid = kwargs.get("coder_uuid", None)
         # Call parent
         super().update_spinner_suffix(text)
 
@@ -388,11 +397,13 @@ def update_spinner_suffix(self, text=None):
                 "type": "spinner",
                 "action": "update_suffix",
                 "text": text,
+                "coder_uuid": coder_uuid,
             }
         )
 
-    def stop_spinner(self):
+    def stop_spinner(self, **kwargs):
         """Override stop_spinner to send stop state to TUI."""
+        coder_uuid = kwargs.get("coder_uuid", None)
         # Call parent
         super().stop_spinner()
 
@@ -402,6 +413,8 @@ def stop_spinner(self):
                 "type": "spinner",
                 "action": "stop",
             }
+                "coder_uuid": coder_uuid,
+                "coder_uuid": coder_uuid,
         )
 
     def interrupt_input(self):
diff --git a/cecli/tui/widgets/footer.py b/cecli/tui/widgets/footer.py
index b85f4eccd8f..52523963b53 100644
--- a/cecli/tui/widgets/footer.py
+++ b/cecli/tui/widgets/footer.py
@@ -10,6 +10,7 @@ class MainFooter(Static):
 
     # Left side info
     coder_mode = reactive("code")
+    agent_name = reactive("")
     model_name = reactive("")
 
     # Right side info
@@ -46,6 +47,7 @@ def __init__(
         self.project_name = project_name
         self.git_branch = git_branch
         self.coder_mode = coder_mode
+        self.agent_name = ""
         self._spinner_interval = None
 
     def on_mount(self):
@@ -100,6 +102,8 @@ def render(self) -> Text:
             left.append(f"{spinner_char} ")
             if self.spinner_text:
                 left.append(self.spinner_text)
+            if self.agent_name:
+                left.append(f"({self.agent_name}) ")
 
             # When a sub-agent is generating, show its model alongside the spinner
             # if self._has_running_sub_agent():
@@ -178,7 +182,9 @@ def update_mode(self, mode: str):
 
     def start_spinner(self, text: str = ""):
         """Show spinner with optional text."""
+    def start_spinner(self, text: str = "", agent_name: str = ""):
         self.spinner_text = text
+        self.agent_name = agent_name
         self.spinner_visible = True
         self.refresh()
 

From a1336cd5ca9375160fca1d7baf5e4c3559370dc1 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Thu, 28 May 2026 04:22:33 -0700
Subject: [PATCH 06/37] feat: Add agent-specific status messages to TUI

Co-authored-by: cecli (openai/nvidia_nim/deepseek-ai/deepseek-v4-pro)
---
 cecli/tui/app.py                | 47 +++++++++++++++++++++++++--------
 cecli/tui/widgets/status_bar.py | 14 +++++++---
 2 files changed, 46 insertions(+), 15 deletions(-)

diff --git a/cecli/tui/app.py b/cecli/tui/app.py
index d3cd0eb736b..15c2a12252a 100644
--- a/cecli/tui/app.py
+++ b/cecli/tui/app.py
@@ -502,9 +502,10 @@ def check_output_queue(self):
             pass
 
     def handle_output_message(self, msg):
-        """Route output messages to appropriate handlers."""
         msg_type = msg["type"]
 
+        # Resolve agent_name from coder_uuid for agent-specific status messages
+        agent_name = self._resolve_agent_name(msg.get("coder_uuid"))
         if msg_type == "output":
             container = self._get_output_container(msg)
             container.add_output(msg["text"], msg.get("task_id"))
@@ -532,15 +533,15 @@ def handle_output_message(self, msg):
             container = self._get_output_container(msg)
             container.start_task(msg["task_id"], msg["title"], msg.get("task_type"))
         elif msg_type == "confirmation":
-            self.show_confirmation(msg)
+            self.show_confirmation(msg, agent_name=agent_name)
         elif msg_type == "spinner":
-            self.update_spinner(msg)
+            self.update_spinner(msg, agent_name=agent_name)
         elif msg_type == "ready_for_input":
             self.enable_input(msg)
             footer = self.query_one(MainFooter)
             footer.stop_spinner()
         elif msg_type == "error":
-            self.show_error(msg["message"])
+            self.show_error(msg["message"], agent_name=agent_name)
         elif msg_type == "cost_update":
             footer = self.query_one(MainFooter)
             footer.update_cost(msg.get("cost", 0))
@@ -563,6 +564,28 @@ def handle_output_message(self, msg):
             else:
                 self._switch_to_container(target_uuid)
 
+
+    def _resolve_agent_name(self, coder_uuid: str | None) -> str | None:
+        """Resolve an agent display name from a coder_uuid.
+
+        Returns the sub-agent's name if the coder_uuid belongs to a known
+        sub-agent, otherwise None (primary agent uses no prefix).
+        """
+        if not coder_uuid:
+            return None
+        try:
+            from cecli.helpers.agents.service import AgentService
+
+            agent_service = AgentService.get_instance(self.worker.coder)
+            primary_uuid = str(agent_service.coder.uuid)
+            if coder_uuid == primary_uuid:
+                return None  # Primary agent gets no prefix
+            for info in agent_service.sub_agents.values():
+                if str(info.coder.uuid) == coder_uuid:
+                    return info.name
+        except Exception:
+            pass
+        return None
     def add_output(self, text, task_id=None):
         """Add output to the output container."""
         output_container = self.query_one("#output", OutputContainer)
@@ -601,7 +624,7 @@ def start_task(self, task_id, title, task_type="general"):
         output_container = self.query_one("#output", OutputContainer)
         output_container.start_task(task_id, title, task_type)
 
-    def show_confirmation(self, msg):
+    def show_confirmation(self, msg, agent_name: str | None = None):
         """Show inline confirmation bar."""
         # Disable input while confirm bar is active
         input_area = self.query_one("#input", InputArea)
@@ -623,6 +646,7 @@ def show_confirmation(self, msg):
             allow_never=allow_never,
             default=options.get("default", "y"),
             explicit_yes_required=options.get("explicit_yes_required", False),
+            agent_name=agent_name,
         )
 
     def enable_input(self, msg, coder=None):
@@ -657,13 +681,13 @@ def enable_input(self, msg, coder=None):
 
         input_area.focus()
 
-    def update_spinner(self, msg):
+    def update_spinner(self, msg, agent_name: str | None = None):
         """Update spinner in footer."""
         footer = self.query_one(MainFooter)
         action = msg.get("action", "start")
 
         if action == "start":
-            footer.start_spinner(msg.get("text", ""))
+            footer.start_spinner(msg.get("text", ""), agent_name=agent_name)
         elif action == "update":
             footer.spinner_text = msg.get("text", "")
         elif action == "update_suffix":
@@ -671,10 +695,11 @@ def update_spinner(self, msg):
         elif action == "stop":
             footer.stop_spinner()
 
-    def show_error(self, message):
-        """Show error notification."""
-        status_bar = self.query_one("#status-bar", StatusBar)
-        status_bar.show_notification(f"Error: {message}", severity="error", timeout=10)
+    def show_error(self, message, agent_name: str | None = None):
+        """Show an error message in the status bar."""
+        self.status_bar.show_notification(
+            message, severity="error", timeout=5, agent_name=agent_name
+        )
 
     def on_resize(self) -> None:
         file_list = self.query_one("#file-list", FileList)
diff --git a/cecli/tui/widgets/status_bar.py b/cecli/tui/widgets/status_bar.py
index 5197d04f064..66925df3176 100644
--- a/cecli/tui/widgets/status_bar.py
+++ b/cecli/tui/widgets/status_bar.py
@@ -126,6 +126,7 @@ def __init__(self, **kwargs):
         """Initialize status bar."""
         super().__init__(**kwargs)
         self._text = ""
+        self._agent_name: str | None = None
         self._severity = "info"
         self._show_all = False
         self._allow_tweak = False
@@ -133,7 +134,6 @@ def __init__(self, **kwargs):
         self._default = "y"
         self._explicit_yes_required = False
         self._timer = None
-
     def compose(self) -> ComposeResult:
         """Create empty container - content added dynamically."""
         yield Horizontal(classes="status-content")
@@ -153,9 +153,11 @@ def _rebuild_content(self) -> None:
         container.remove_children()
 
         if self.mode == "notification":
-            container.mount(Static(self._text, classes=f"notification-text {self._severity}"))
+            display_text = f"({self._agent_name}) {self._text}" if self._agent_name else self._text
+            container.mount(Static(display_text, classes=f"notification-text {self._severity}"))
         elif self.mode == "confirm":
-            container.mount(Static(self._text, classes="confirm-question"))
+            display_text = f"({self._agent_name}) {self._text}" if self._agent_name else self._text
+            container.mount(Static(display_text, classes="confirm-question"))
             hints = Horizontal(classes="confirm-hints")
             container.mount(hints)
             hints.mount(Static("\\[y]es", classes="hint hint-yes"))
@@ -169,7 +171,8 @@ def _rebuild_content(self) -> None:
                 hints.mount(Static("\\[d]on't ask again", classes="hint hint-never"))
 
     def show_notification(
-        self, text: str, severity: str = "info", timeout: float | None = 3.0
+        self, text: str, severity: str = "info", timeout: float | None = 3.0,
+        agent_name: str | None = None,
     ) -> None:
         """Show a transient notification message.
 
@@ -184,6 +187,7 @@ def show_notification(
             self._timer = None
 
         self._text = text
+        self._agent_name = agent_name
         self._severity = severity
         self.mode = "notification"
         self._rebuild_content()
@@ -199,6 +203,7 @@ def show_confirm(
         allow_never: bool = False,
         default: str = "y",
         explicit_yes_required: bool = False,
+        agent_name: str | None = None,
     ) -> None:
         """Show a confirmation prompt.
 
@@ -216,6 +221,7 @@ def show_confirm(
             self._timer = None
 
         self._text = question
+        self._agent_name = agent_name
         self._show_all = show_all
         self._allow_tweak = allow_tweak
         self._allow_never = allow_never

From 951988eb03c5fd57969cd64c470aafece9f278e8 Mon Sep 17 00:00:00 2001
From: Jessica Mulein <jessica@digitaldefiance.org>
Date: Thu, 28 May 2026 08:26:34 -0700
Subject: [PATCH 07/37] chore: fix session tests and pass CI pre-commit

Apply isort/black formatting on session encryption files. Give legacy
test_sessions mocks explicit args (session_encrypt off, model fields) so
save/load work with SessionManager encryption and Model restore paths.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 cecli/session_crypto.py            |  4 +++-
 cecli/sessions.py                  |  7 ++-----
 tests/basic/test_session_crypto.py |  6 +++++-
 tests/basic/test_sessions.py       | 12 ++++++++++++
 4 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/cecli/session_crypto.py b/cecli/session_crypto.py
index dbbcab8a9a1..7d68d711edb 100644
--- a/cecli/session_crypto.py
+++ b/cecli/session_crypto.py
@@ -98,7 +98,9 @@ def decrypt_session_bytes(data: bytes, key: bytes) -> dict[str, Any]:
     try:
         plaintext = AESGCM(key).decrypt(nonce, ciphertext, None)
     except Exception as err:
-        raise SessionCryptoError("Could not decrypt session (wrong key or corrupted file).") from err
+        raise SessionCryptoError(
+            "Could not decrypt session (wrong key or corrupted file)."
+        ) from err
     try:
         parsed = json.loads(plaintext.decode("utf-8"))
     except json.JSONDecodeError as err:
diff --git a/cecli/sessions.py b/cecli/sessions.py
index d097b48fa4e..69b44c54c29 100644
--- a/cecli/sessions.py
+++ b/cecli/sessions.py
@@ -5,8 +5,7 @@
 from pathlib import Path
 from typing import Dict, List, Optional
 
-from cecli import models
-from cecli import session_crypto
+from cecli import models, session_crypto
 from cecli.helpers.conversation import ConversationService, MessageTag
 
 
@@ -70,9 +69,7 @@ def _write_session_file(self, session_file: Path, session_data: dict) -> bool:
                         f"({session_crypto.KEY_ENV} or --session-key-file)."
                     )
                     return False
-                session_file.write_bytes(
-                    session_crypto.encrypt_session_dict(session_data, key)
-                )
+                session_file.write_bytes(session_crypto.encrypt_session_dict(session_data, key))
             else:
                 with open(session_file, "w", encoding="utf-8") as f:
                     json.dump(session_data, f, indent=2)
diff --git a/tests/basic/test_session_crypto.py b/tests/basic/test_session_crypto.py
index 949e617adfb..965d6c14a4a 100644
--- a/tests/basic/test_session_crypto.py
+++ b/tests/basic/test_session_crypto.py
@@ -72,7 +72,11 @@ def test_empty_encrypted_body_raises(session_key32):
 
 def test_encrypted_file_roundtrip_on_disk(tmp_path, session_key32):
     path = tmp_path / "sess.json"
-    payload = {"version": 1, "session_name": "disk", "chat_history": {"done_messages": [], "cur_messages": []}}
+    payload = {
+        "version": 1,
+        "session_name": "disk",
+        "chat_history": {"done_messages": [], "cur_messages": []},
+    }
     path.write_bytes(session_crypto.encrypt_session_dict(payload, session_key32))
     raw = path.read_bytes()
     assert session_crypto.is_encrypted_payload(raw)
diff --git a/tests/basic/test_sessions.py b/tests/basic/test_sessions.py
index c6611e12909..9262df2d147 100644
--- a/tests/basic/test_sessions.py
+++ b/tests/basic/test_sessions.py
@@ -1,5 +1,6 @@
 import json
 import os
+from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
@@ -47,6 +48,17 @@ def mock_coder():
     coder.mcp_manager = None
     coder.skills_manager = None
     coder.io.read_text.return_value = "some todo content"
+    coder.format_chat_chunks = MagicMock()
+    coder.args = SimpleNamespace(
+        model="test_model",
+        weak_model="test_weak_model",
+        editor_model="test_editor_model",
+        agent_model="test_agent_model",
+        editor_edit_format="editor-diff",
+        verbose=False,
+        session_encrypt=False,
+        session_key_file=None,
+    )
 
     return coder
 

From c73c7a40aca7550e8022a976e90012d158c87702 Mon Sep 17 00:00:00 2001
From: Jessica Mulein <jessica@digitaldefiance.org>
Date: Thu, 28 May 2026 14:38:34 -0700
Subject: [PATCH 08/37] fix(coder): Ollama-friendly empty LLM tool warning

Replace legacy "provider account" copy when the model returns no tokens
or tool calls; keep cloud-oriented hint for non-Ollama models.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 cecli/coders/base_coder.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/cecli/coders/base_coder.py b/cecli/coders/base_coder.py
index 2b2fbdb40be..ff7a92829b6 100755
--- a/cecli/coders/base_coder.py
+++ b/cecli/coders/base_coder.py
@@ -2306,6 +2306,16 @@ async def check_tokens(self, messages):
     def get_active_model(self):
         return self.main_model
 
+    def empty_llm_tool_warning(self) -> str:
+        """Ollama-friendly copy for local models; cloud hint otherwise."""
+        name = str(getattr(getattr(self, "main_model", None), "name", "") or "")
+        if "ollama" in name.lower():
+            return (
+                "Empty response from the local model (Ollama). "
+                "The model may have timed out, unloaded, or hit context limits."
+            )
+        return "Empty response received from LLM. Check API keys, quota, or provider status."
+
     async def send_message(self, inp):
         # Notify IO that LLM processing is starting
         self.io.llm_started()
@@ -3363,7 +3373,7 @@ async def show_send_output(self, completion):
             and not len(self.partial_response_tool_calls)
             and not len(self.partial_response_reasoning_content)
         ):
-            self.io.tool_warning("Empty response received from LLM. Check your provider account?")
+            self.io.tool_warning(self.empty_llm_tool_warning())
 
         self.io.assistant_output(show_resp, pretty=self.show_pretty())
 
@@ -3520,7 +3530,7 @@ async def show_send_output_stream(self, completion):
             return
 
         if not received_content and len(self.partial_response_tool_calls) == 0:
-            self.io.tool_warning("Empty response received from LLM. Check your provider account?")
+            self.io.tool_warning(self.empty_llm_tool_warning())
 
     def consolidate_chunks(self):
         if self.partial_response_consolidated:

From 3af6b45644fa331213b2ee01a8b3256d0ea2e0a6 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Thu, 28 May 2026 23:12:52 -0400
Subject: [PATCH 09/37] Allow reaping sub agents by name/identifier

---
 cecli/commands/reap_agent.py | 155 ++++++++++++++++++++++++++++-------
 1 file changed, 124 insertions(+), 31 deletions(-)

diff --git a/cecli/commands/reap_agent.py b/cecli/commands/reap_agent.py
index ce0beb6a24c..bbdbdf81874 100644
--- a/cecli/commands/reap_agent.py
+++ b/cecli/commands/reap_agent.py
@@ -1,6 +1,7 @@
 """Reap-agent command - force destroys the active sub-agent."""
 
 import weakref
+from typing import List
 
 from cecli.helpers.agents.service import AgentService
 
@@ -14,47 +15,100 @@ class ReapAgentCommand(BaseCommand):
 
     @classmethod
     async def execute(cls, io, coder, args, **kwargs):
-        """Destroy the active sub-agent and clean up its resources."""
-        active_uuid = None
-
-        # Use _get_tui logic (same as AgentService._get_tui) to safely
-        # dereference the TUI weakref. The TUI stores itself on coders
-        # as a weakref.ref, so we must call it to get the live object.
-        tui_ref = getattr(coder, "tui", None)
-        if tui_ref is not None:
-            if isinstance(tui_ref, weakref.ref):
-                tui_instance = tui_ref()
-            else:
-                tui_instance = tui_ref
-            if tui_instance is not None:
-                active_uuid = tui_instance._get_visible_coder().uuid
+        """Destroy a sub-agent and clean up its resources.
 
-        if not active_uuid:
-            io.tool_error("No active sub-agent to reap.")
-            return
+        If an agent identifier is provided, looks up the sub-agent by
+        name or UUID prefix (matching switch-agent semantics).  Without
+        an argument the currently-active sub-agent (from the TUI) is
+        reaped.
+        """
+        agent_identifier = args.strip() if args else ""
 
-        # Find the sub-agent info by UUID
-        agent_service = AgentService.get_instance(coder)
+        # --- Resolve the target UUID -------------------------------- #
+        agent_uuid = None
         target_name = None
-        target_info = None
-        for name, info in list(agent_service.sub_agents.items()):
-            if info.coder.uuid == active_uuid:
-                target_name = name
-                target_info = info
-                break
 
-        if target_name is None:
-            io.tool_error("Could not find sub-agent for the active container.")
-            return
+        if agent_identifier:
+            # Lookup logic mirroring switch-agent
+            agent_service = AgentService.get_instance(coder)
+
+            # Try parsing "name (uuid_prefix)" format
+            if agent_identifier.endswith(")") and " (" in agent_identifier:
+                try:
+                    uuid_prefix = agent_identifier.rsplit(" (", 1)[1][:-1]
+                    for uuid, info in agent_service.sub_agents.items():
+                        if uuid.startswith(uuid_prefix):
+                            agent_uuid = uuid
+                            target_name = info.name
+                            break
+                except IndexError:
+                    pass
+
+            # Try matching by name directly
+            if agent_uuid is None:
+                for uuid, info in agent_service.sub_agents.items():
+                    if info.name == agent_identifier:
+                        agent_uuid = uuid
+                        target_name = info.name
+                        break
+
+            # Try matching by UUID prefix directly
+            if agent_uuid is None:
+                for uuid, info in agent_service.sub_agents.items():
+                    if uuid.startswith(agent_identifier):
+                        agent_uuid = uuid
+                        target_name = info.name
+                        break
+
+            if agent_uuid is None:
+                io.tool_error(f"Error: Agent '{agent_identifier}' not found.")
+                return
+
+            # Prevent reaping the primary coder
+            if agent_uuid == str(coder.uuid):
+                io.tool_error("Cannot reap the primary coder.")
+                return
+
+        else:
+            # Original behaviour: reap the active sub-agent from the TUI
+            active_uuid = None
+
+            # Use _get_tui logic (same as AgentService._get_tui) to safely
+            # dereference the TUI weakref. The TUI stores itself on coders
+            # as a weakref.ref, so we must call it to get the live object.
+            tui_ref = getattr(coder, "tui", None)
+            if tui_ref is not None:
+                if isinstance(tui_ref, weakref.ref):
+                    tui_instance = tui_ref()
+                else:
+                    tui_instance = tui_ref
+                if tui_instance is not None:
+                    active_uuid = tui_instance._get_visible_coder().uuid
+
+            if not active_uuid:
+                io.tool_error("No active sub-agent to reap.")
+                return
 
+            # Find the sub-agent info by UUID
+            agent_service = AgentService.get_instance(coder)
+            for name, info in list(agent_service.sub_agents.items()):
+                if info.coder.uuid == active_uuid:
+                    agent_uuid = active_uuid
+                    target_name = name
+                    break
+            else:
+                io.tool_error("Could not find sub-agent for the active container.")
+                return
+
+        # --- Cleanup ------------------------------------------------ #
         try:
             # Cleanup conversation resources
             from cecli.helpers.conversation.service import ConversationService
 
-            ConversationService.destroy_instances(target_info.coder.uuid)
+            ConversationService.destroy_instances(agent_uuid)
 
             # Remove from tracking and clean up
-            agent_service._cleanup_sub_agent(target_info.coder.uuid)
+            agent_service._cleanup_sub_agent(agent_uuid)
 
             io.tool_output(f"Sub-agent '{target_name}' reaped.")
         except (KeyError, AttributeError, RuntimeError) as e:
@@ -62,6 +116,45 @@ async def execute(cls, io, coder, args, **kwargs):
         except Exception as e:
             io.tool_error(f"Unexpected error reaping sub-agent: {e}")
 
+    @classmethod
+    def get_completions(cls, io, coder, args) -> List[str]:
+        """Get completion options for reap-agent command."""
+        try:
+            agent_service = AgentService.get_instance(coder)
+            names: List[str] = []
+
+            # Add sub-agent names
+            if agent_service and agent_service.sub_agents:
+                # First pass: count name occurrences
+                name_counts = {}
+                for uuid, sub_agent_info in agent_service.sub_agents.items():
+                    name_counts[sub_agent_info.name] = name_counts.get(sub_agent_info.name, 0) + 1
+
+                # Second pass: only show UUID prefix when name appears multiple times
+                for uuid, sub_agent_info in agent_service.sub_agents.items():
+                    name = sub_agent_info.name
+                    if name_counts[name] > 1:
+                        names.append(f"{name} ({uuid[:3]})")
+                    else:
+                        names.append(name)
+
+            current_arg = args.strip().lower()
+            if current_arg:
+                return [name for name in names if name.lower().startswith(current_arg)]
+            else:
+                return names
+        except Exception:
+            return []
+
     @classmethod
     def get_help(cls) -> str:
-        return "Force destroy the active sub-agent (/reap-agent)"
+        help_text = "Force destroy the active sub-agent (/reap-agent)"
+        help_text += "\n\nUsage:\n"
+        help_text += "  /reap-agent              # Reap the currently active sub-agent\n"
+        help_text += "  /reap-agent <name>       # Reap a sub-agent by name\n"
+        help_text += "  /reap-agent <uuid>       # Reap a sub-agent by UUID prefix\n"
+        help_text += "  /reap-agent <name> (<prefix>)  # Reap by name with UUID disambiguation\n"
+        help_text += "\nExamples:\n"
+        help_text += "  /reap-agent reviewer\n"
+        help_text += "  /reap-agent abc\n"
+        return help_text

From d39efb5e4492f9e26467e4bbb5d29e4963c9492a Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Fri, 29 May 2026 02:33:10 -0400
Subject: [PATCH 10/37] Sub agent changes: - Unify /invoke-agent and
 /spawn-agent into a single non-blocking /spawn-agent command - Pass parent
 uuids for nested delegation appropriately

---
 cecli/commands/__init__.py             |   3 -
 cecli/commands/invoke_agent.py         |  54 ------------
 cecli/commands/spawn_agent.py          |  23 +++--
 cecli/helpers/agents/service.py        | 116 +++++++++++++++++++------
 cecli/tools/delegate.py                |   2 +-
 cecli/website/docs/config/subagents.md |  18 ++--
 6 files changed, 115 insertions(+), 101 deletions(-)
 delete mode 100644 cecli/commands/invoke_agent.py

diff --git a/cecli/commands/__init__.py b/cecli/commands/__init__.py
index 549cf12e19d..05e352a66ea 100644
--- a/cecli/commands/__init__.py
+++ b/cecli/commands/__init__.py
@@ -33,7 +33,6 @@
 from .history_search import HistorySearchCommand
 from .hooks import HooksCommand
 from .include_skill import IncludeSkillCommand
-from .invoke_agent import InvokeAgentCommand
 from .lint import LintCommand
 from .list_sessions import ListSessionsCommand
 from .list_skills import ListSkillsCommand
@@ -117,7 +116,6 @@
 CommandRegistry.register(HelpCommand)
 CommandRegistry.register(HistorySearchCommand)
 CommandRegistry.register(HooksCommand)
-CommandRegistry.register(InvokeAgentCommand)
 CommandRegistry.register(ReapAgentCommand)
 CommandRegistry.register(SpawnAgentCommand)
 CommandRegistry.register(SwitchAgentCommand)
@@ -200,7 +198,6 @@
     "HistorySearchCommand",
     "HooksCommand",
     "IncludeSkillCommand",
-    "InvokeAgentCommand",
     "ReapAgentCommand",
     "SpawnAgentCommand",
     "SwitchAgentCommand",
diff --git a/cecli/commands/invoke_agent.py b/cecli/commands/invoke_agent.py
deleted file mode 100644
index 304e41df4e4..00000000000
--- a/cecli/commands/invoke_agent.py
+++ /dev/null
@@ -1,54 +0,0 @@
-"""Invoke-agent command - invokes a sub-agent with a prompt."""
-
-from .utils.base_command import BaseCommand
-
-
-class InvokeAgentCommand(BaseCommand):
-    NORM_NAME = "invoke-agent"
-    DESCRIPTION = "Invoke a sub-agent with a prompt (blocking)"
-    show_completion_notification = False
-
-    @classmethod
-    async def execute(cls, io, coder, args, **kwargs):
-        """Invoke a sub-agent by name with a prompt."""
-        from cecli.helpers.agents.service import AgentService
-
-        parts = args.strip().split(maxsplit=1)
-        if not parts:
-            io.tool_error("Usage: /invoke-agent <name> <prompt>")
-            return
-
-        name = parts[0]
-        prompt = parts[1] if len(parts) > 1 else ""
-
-        try:
-            agent_service = AgentService.get_instance(coder)
-            summary = await agent_service.invoke(name, prompt, blocking=True)
-            if summary:
-                from cecli.helpers.conversation.service import ConversationService
-                from cecli.helpers.conversation.tags import MessageTag
-
-                ConversationService.get_manager(coder).add_message(
-                    message_dict=dict(role="user", content=summary),
-                    tag=MessageTag.CUR,
-                )
-                io.tool_output(f"Sub-agent '{name}' completed:\n{summary}")
-            else:
-                io.tool_output(f"Sub-agent '{name}' completed (no summary).")
-        except ValueError as e:
-            io.tool_error(f"Error: {e}")
-        except RuntimeError as e:
-            io.tool_error(f"Error: {e}")
-        except Exception as e:
-            io.tool_error(f"Error invoking sub-agent '{name}': {e}")
-
-    @classmethod
-    def get_help(cls) -> str:
-        return "Invoke a sub-agent with a prompt (/invoke-agent <name> <prompt>)"
-
-    @classmethod
-    def get_completions(cls, io, coder, args) -> list[str]:
-        """Return registered sub-agent names for tab-completion."""
-        from cecli.helpers.agents.service import AgentService
-
-        return list(AgentService.get_registry().keys())
diff --git a/cecli/commands/spawn_agent.py b/cecli/commands/spawn_agent.py
index afde0c2e799..33ffe3c7953 100644
--- a/cecli/commands/spawn_agent.py
+++ b/cecli/commands/spawn_agent.py
@@ -5,21 +5,30 @@
 
 class SpawnAgentCommand(BaseCommand):
     NORM_NAME = "spawn-agent"
-    DESCRIPTION = "Spawn a sub-agent without a prompt (waits for user input)"
+    DESCRIPTION = "Spawn a sub-agent, optionally with a prompt"
 
     @classmethod
     async def execute(cls, io, coder, args, **kwargs):
-        """Spawn a sub-agent by name (non-blocking)."""
+        """Spawn a sub-agent by name, optionally with a prompt.
+
+        Syntax:
+            /spawn-agent <name>           — Spawn without prompt (waits for user input)
+            /spawn-agent <name> <prompt>  — Spawn and start processing the prompt immediately
+        """
         from cecli.helpers.agents.service import AgentService
 
-        name = args.strip()
-        if not name:
-            io.tool_error("Usage: /spawn-agent <name>")
+        parts = args.strip().split(maxsplit=1)
+        if not parts:
+            io.tool_error("Usage: /spawn-agent <name> [<prompt>]")
             return
 
+        name = parts[0]
+        prompt = parts[1] if len(parts) > 1 else None
+
         try:
             agent_service = AgentService.get_instance(coder)
-            await agent_service.spawn(name)
+            await agent_service.spawn(name, prompt, parent=coder)
+
             if coder.tui and coder.tui():
                 switch_key = coder.tui().get_keys_for("next_agent")
                 io.tool_output(f"Sub-agent '{name}' spawned. " f"Switch to it with {switch_key}")
@@ -32,7 +41,7 @@ async def execute(cls, io, coder, args, **kwargs):
 
     @classmethod
     def get_help(cls) -> str:
-        return "Spawn a sub-agent that waits for user input (/spawn-agent <name>)"
+        return "Spawn a sub-agent, optionally with a prompt (/spawn-agent <name> [<prompt>])"
 
     @classmethod
     def get_completions(cls, io, coder, args) -> list[str]:
diff --git a/cecli/helpers/agents/service.py b/cecli/helpers/agents/service.py
index 72322b378eb..fc131030daa 100644
--- a/cecli/helpers/agents/service.py
+++ b/cecli/helpers/agents/service.py
@@ -307,7 +307,9 @@ def _check_max_sub_agents(self) -> None:
                 "Wait for one to finish or use /reap-agent to free resources."
             )
 
-    async def _create_sub_agent_coder(self, name: str) -> Tuple[Any, SubAgentInfo]:
+    async def _create_sub_agent_coder(
+        self, name: str, parent: Any = None
+    ) -> Tuple[Any, SubAgentInfo]:
         """Create a sub-agent coder, register it, and set up its container and prompt.
 
         Shared helper used by both ``invoke()`` and ``spawn()`` to eliminate
@@ -315,6 +317,10 @@ async def _create_sub_agent_coder(self, name: str) -> Tuple[Any, SubAgentInfo]:
 
         Args:
             name: Name of the sub-agent to create.
+            parent: Optional coder instance to use as the parent.
+                    If provided, the new sub-agent's ``parent_uuid`` will be
+                    ``parent.uuid`` instead of ``self.coder.uuid``, enabling
+                    nested sub-agent hierarchies. Defaults to ``self.coder``.
 
         Returns:
             Tuple of ``(new_coder, info)``.
@@ -333,7 +339,7 @@ async def _create_sub_agent_coder(self, name: str) -> Tuple[Any, SubAgentInfo]:
 
         from cecli.coders import Coder
 
-        parent_coder = self.coder
+        parent_coder = parent if parent is not None else self.coder
         new_uuid = str(uuid4())
 
         kwargs = dict(
@@ -371,7 +377,7 @@ async def _create_sub_agent_coder(self, name: str) -> Tuple[Any, SubAgentInfo]:
 
         # Notify TUI to create a container
         try:
-            tui = self._get_tui(parent_coder)
+            tui = self._get_tui(self.coder)
             if tui is not None:
                 tui.call_from_thread(tui.create_sub_agent_container, new_uuid, name)
         except Exception:
@@ -464,9 +470,19 @@ async def _run_generate():
         info.generate_task = task
         return task
 
-    async def invoke(self, name: str, prompt: str, blocking: bool = True) -> Optional[str]:
-        """Invoke a sub-agent by name with the given prompt (blocking by default)."""
-        new_coder, info = await self._create_sub_agent_coder(name)
+    async def invoke(
+        self, name: str, prompt: str, blocking: bool = True, parent: Any = None
+    ) -> Optional[str]:
+        """Invoke a sub-agent by name with the given prompt (blocking by default).
+
+        Args:
+            name: Name of the sub-agent to invoke.
+            prompt: The user message to pass to the sub-agent.
+            blocking: If True, waits for completion and returns summary.
+            parent: Optional coder instance to use as the parent for nested
+                   sub-agent hierarchies. Defaults to ``self.coder``.
+        """
+        new_coder, info = await self._create_sub_agent_coder(name, parent)
 
         if not blocking:
             return None
@@ -476,32 +492,58 @@ async def invoke(self, name: str, prompt: str, blocking: bool = True) -> Optiona
         await task
         return info.summary
 
-    async def spawn(self, name: str) -> None:
-        """Spawn a sub-agent (non-blocking) that waits for user input."""
-        await self._create_sub_agent_coder(name)
+    async def spawn(
+        self, name: str, prompt: Optional[str] = None, parent: Any = None
+    ) -> Tuple[Any, SubAgentInfo]:
+        """Spawn a sub-agent (non-blocking) that waits for user input.
+
+        Args:
+            name: Name of the sub-agent to spawn.
+            prompt: Optional prompt. If provided, starts the generate task
+                    immediately with this prompt (fire-and-forget).
+            parent: Optional coder instance to use as the parent for nested
+                   sub-agent hierarchies. Defaults to ``self.coder``.
 
-    async def wait(self, name: str) -> Optional[str]:
-        """Wait for a sub-agent to finish and return its summary."""
-        # Find by name (allows multiple instances of the same agent type)
-        info = None
-        for candidate in self.sub_agents.values():
-            if candidate.name == name:
-                info = candidate
-                break
-        if not info:
-            raise ValueError(f"No sub-agent named '{name}' running.")
+        Returns:
+            Tuple of ``(new_coder, info)`` so callers can further interact
+            with the sub-agent (e.g. call ``start_generate_task`` later).
+        """
+        new_coder, info = await self._create_sub_agent_coder(name, parent)
+        if prompt:
+            self.start_generate_task(info, prompt)
+        return new_coder, info
 
-        if info.status == SubAgentStatus.FINISHED:
-            return info.summary
+    async def wait(self, parent: Any) -> List[str]:
+        """Await all active sub-agents whose ``parent_uuid`` matches ``parent.uuid``.
 
-        # Poll until finished
-        while info.status not in (SubAgentStatus.FINISHED, SubAgentStatus.ERROR):
-            await asyncio.sleep(0.5)
+        Waits for every child's generate task to finish (via ``asyncio.gather``)
+        and returns their summaries as a list.
 
-        if info.status == SubAgentStatus.ERROR:
-            raise RuntimeError(f"Sub-agent '{name}' failed: {info.error}")
+        Args:
+            parent: A coder instance (with ``.uuid``) or a UUID string whose
+                    children should be awaited.
 
-        return info.summary
+        Returns:
+            ``List[str]`` — one summary per child sub-agent.  May be empty
+            if the parent has no active children.
+        """
+        uid = str(parent.uuid) if hasattr(parent, "uuid") else str(parent)
+        children = [info for info in self.sub_agents.values() if info.parent_uuid == uid]
+        if not children:
+            logger.debug("wait(%s): no children found", uid)
+            return []
+
+        # Collect all active generate tasks
+        tasks = []
+        for info in children:
+            if info.generate_task is not None and not info.generate_task.done():
+                tasks.append(info.generate_task)
+
+        if tasks:
+            logger.debug("wait(%s): awaiting %d generate task(s)", uid, len(tasks))
+            await asyncio.gather(*tasks)
+
+        return [info.summary for info in children]
 
     def get_active_agents(self) -> List[Dict[str, Any]]:
         """Return list of active sub-agents for display."""
@@ -515,6 +557,26 @@ def get_active_agents(self) -> List[Dict[str, Any]]:
             for info in self.sub_agents.values()
         ]
 
+    def get_children(self, coder_or_uuid: Any) -> List[SubAgentInfo]:
+        """Return sub-agents whose parent is the given coder or UUID.
+
+        Accepts either a coder instance (object with a ``uuid`` attribute)
+        or a plain UUID string.  Returns all ``SubAgentInfo`` entries whose
+        ``parent_uuid`` matches the resolved identifier.
+
+        Args:
+            coder_or_uuid: A coder instance (with ``.uuid``) or a UUID string.
+
+        Returns:
+            List of ``SubAgentInfo`` objects whose parent is the given coder.
+        """
+        if hasattr(coder_or_uuid, "uuid"):
+            uid = str(coder_or_uuid.uuid)
+        else:
+            uid = str(coder_or_uuid)
+
+        return [info for info in self.sub_agents.values() if info.parent_uuid == uid]
+
     # ------------------------------------------------------------------ #
     # Foreground agent tracking
     # ------------------------------------------------------------------ #
diff --git a/cecli/tools/delegate.py b/cecli/tools/delegate.py
index 29c4daff90b..e01cebc2e17 100644
--- a/cecli/tools/delegate.py
+++ b/cecli/tools/delegate.py
@@ -72,7 +72,7 @@ async def _run_one(name: str, prompt: str) -> tuple[bool, str]:
             """Run a single sub-agent and return a (success, formatted_message) tuple."""
             try:
                 agent_service._check_max_sub_agents()
-                summary = await agent_service.invoke(name, prompt, blocking=True)
+                summary = await agent_service.invoke(name, prompt, parent=coder, blocking=True)
                 if summary:
                     return True, f"Sub-agent '{name}' completed:\n{summary}"
                 return True, f"Sub-agent '{name}' completed (no summary)."
diff --git a/cecli/website/docs/config/subagents.md b/cecli/website/docs/config/subagents.md
index 5f876fc3b8a..1d20b3ae6a8 100644
--- a/cecli/website/docs/config/subagents.md
+++ b/cecli/website/docs/config/subagents.md
@@ -67,21 +67,21 @@ agent-config:
 
 | Command | Description |
 |---------|-------------|
-| `/invoke-agent <name> <prompt>` | Invoke a sub-agent with a prompt (blocking — waits for completion) |
 | `/spawn-agent <name>` | Spawn a sub-agent without a prompt (non-blocking — waits for user input) |
+| `/spawn-agent <name> <prompt>` | Spawn a sub-agent with a prompt (non-blocking — starts processing immediately) |
 | `/reap-agent` | Force destroy the currently active sub-agent |
 
-> **Tip**: Both `/invoke-agent` and `/spawn-agent` support tab completion of sub-agent names.
+> **Tip**: `/spawn-agent` supports tab completion of sub-agent names.
 
-### Invoking a Sub-Agent (Blocking)
+### Spawning a Sub-Agent with a Prompt
 
-The most common way to use sub-agents. The primary agent waits for the sub-agent to finish:
+Spawns a sub-agent and immediately sends it a prompt to start processing (non-blocking):
 
 ```
-/invoke-agent reviewer Can you review the changes in editblock_func_coder.py?
+/spawn-agent reviewer Can you review the changes in editblock_func_coder.py?
 ```
 
-This sends the prompt to the reviewer sub-agent, which works autonomously and returns a summary when done.
+This spawns the reviewer sub-agent and sends it the prompt. The sub-agent begins working autonomously while you can continue interacting with the primary agent.
 
 ### Delegating from the Primary Agent
 
@@ -93,7 +93,7 @@ The primary agent can also delegate work using the `Delegate` tool. This enables
 4. Sub-agents work independently and return their summaries
 5. The primary agent synthesizes the results
 
-### Spawning a Sub-Agent (Non-Blocking)
+### Spawning a Sub-Agent Without a Prompt
 
 Creates a sub-agent that waits for you to interact with it directly:
 
@@ -174,7 +174,7 @@ and suggestions for improvement.
 ```
 
 ```
-/invoke-agent reviewer Please review the last 5 commits in this branch
+/spawn-agent reviewer Please review the last 5 commits in this branch
 ```
 
 ### Example 2: Test Writing Workflow
@@ -192,7 +192,7 @@ happy paths. Use the project's existing testing patterns and conventions.
 ```
 
 ```
-/invoke-agent tester Write unit tests for the new AgentService.invoke() method
+/spawn-agent tester Write unit tests for the new AgentService.invoke() method
 ```
 
 ### Example 3: Multi-Agent Review

From d88848ef40f972b9521ac1369b2e65028563fd19 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Fri, 29 May 2026 04:04:29 -0700
Subject: [PATCH 11/37] fix: Update cecli utils and fix TUI tests

Co-authored-by: cecli (openai/gemini_cli_local/gemini-2.5-pro)
---
 cecli/utils.py        |   4 +-
 tests/tui/test_app.py | 199 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 200 insertions(+), 3 deletions(-)

diff --git a/cecli/utils.py b/cecli/utils.py
index aac9b20b597..ebb928ad91b 100644
--- a/cecli/utils.py
+++ b/cecli/utils.py
@@ -8,7 +8,7 @@
 import tempfile
 from pathlib import Path
 
-import oslex
+import shlex
 
 from cecli.dump import dump  # noqa: F401
 from cecli.waiting import Spinner
@@ -437,7 +437,7 @@ def printable_shell_command(cmd_list):
     Returns:
         str: Shell-escaped command string.
     """
-    return oslex.join(cmd_list)
+    return shlex.join(cmd_list)
 
 
 def split_concatenated_json(s: str) -> list[str]:
diff --git a/tests/tui/test_app.py b/tests/tui/test_app.py
index e6244d87cf2..22d1eed160a 100644
--- a/tests/tui/test_app.py
+++ b/tests/tui/test_app.py
@@ -31,7 +31,204 @@ def test_on_mouse_move_linux(tui_instance):
     """
     Test that on_mouse_move does not stop the event on Linux.
     """
-    with patch("platform.system", return_value="Linux"):
+    with patch("cecli.tui.app.IS_WINDOWS", False):
         mock_event = MagicMock(spec=events.MouseMove)
         tui_instance.on_mouse_move(mock_event)
         mock_event.stop.assert_not_called()
+
+
+
+
+def test_handle_output_message_spinner_with_agent_name(tui_instance, monkeypatch):
+    """
+    Test that spinner status messages display the agent name prefix
+    when a sub-agent is active.
+    """
+    # Mock query_one to return mock widgets for all lookup types
+    mock_footer = MagicMock()
+    mock_footer.spinner_suffix = ""
+    mock_status_bar = MagicMock()
+    mock_input_area = MagicMock()
+    mock_input_container = MagicMock()
+    mock_output_container = MagicMock()
+
+    def mock_query_one(selector, *args):
+        # query_one may be called with class or string selector
+        if isinstance(selector, type):
+            name = selector.__name__
+        else:
+            # String selector - could be CSS like "#input, InputArea"
+            if "," in selector or "#" in selector:
+                return mock_input_area
+            name = "MainFooter"  # Default fallback for footer lookup
+
+        mapping = {
+            "MainFooter": mock_footer,
+            "StatusBar": mock_status_bar,
+            "InputContainer": mock_input_container,
+            "InputArea": mock_input_area,
+            "OutputContainer": mock_output_container,
+        }
+        return mapping.get(name, mock_footer)
+
+    tui_instance.query_one = mock_query_one
+
+    # Mock coder worker for agent service lookups
+    mock_coder = MagicMock()
+    mock_coder.uuid = "primary_uuid"
+    tui_instance.worker = MagicMock()
+    tui_instance.worker.coder = mock_coder
+
+    # Mock AgentService so _resolve_agent_name works
+    mock_agent_service = MagicMock()
+    mock_agent_info = MagicMock()
+    mock_agent_info.name = "researcher"
+    mock_agent_info.coder = MagicMock()
+    mock_agent_info.coder.uuid = "some_uuid"
+    mock_agent_service.sub_agents = {"some_uuid": mock_agent_info}
+    mock_agent_service.coder = mock_coder
+
+    monkeypatch.setattr(
+        "cecli.helpers.agents.service.AgentService.get_instance",
+        lambda *args: mock_agent_service,
+    )
+
+    # Test: sub-agent spinner should include agent_name="researcher"
+    msg = {
+        "type": "spinner", "action": "start", "text": "Thinking...",
+        "coder_uuid": "some_uuid",
+    }
+    tui_instance.handle_output_message(msg)
+    mock_footer.start_spinner.assert_called_once_with(
+        "Thinking...", agent_name="researcher"
+    )
+
+    # Test: primary agent spinner should have agent_name=None
+    mock_footer.reset_mock()
+    msg["coder_uuid"] = "primary_uuid"
+    tui_instance.handle_output_message(msg)
+    mock_footer.start_spinner.assert_called_once_with(
+        "Thinking...", agent_name=None
+    )
+
+def test_handle_output_message_confirmation_with_agent_name(tui_instance, monkeypatch):
+    """
+    Test that confirmation status messages display the agent name prefix.
+    """
+    mock_footer = MagicMock()
+    mock_footer.spinner_suffix = ""
+    mock_status_bar = MagicMock()
+    mock_input_area = MagicMock()
+    mock_input_container = MagicMock()
+    mock_output_container = MagicMock()
+
+    def mock_query_one(selector, *args):
+        if isinstance(selector, type):
+            name = selector.__name__
+        else:
+            if selector == "#input" or selector == "#input, InputArea":
+                return mock_input_area
+            elif selector == "#status-bar" or selector == "#status-bar, StatusBar":
+                return mock_status_bar
+            name = "MainFooter"  # Default fallback
+
+        mapping = {
+            "MainFooter": mock_footer,
+            "StatusBar": mock_status_bar,
+            "InputContainer": mock_input_container,
+            "InputArea": mock_input_area,
+            "OutputContainer": mock_output_container,
+        }
+        return mapping.get(name, mock_footer)
+
+    tui_instance.query_one = mock_query_one
+
+    # Mock coder worker for agent service lookups
+    mock_coder = MagicMock()
+    mock_coder.uuid = "primary_uuid"
+    tui_instance.worker = MagicMock()
+    tui_instance.worker.coder = mock_coder
+
+    # Stub status_bar reference
+    tui_instance.status_bar = mock_status_bar
+
+    # Mock AgentService
+    mock_agent_service = MagicMock()
+    mock_agent_info = MagicMock()
+    mock_agent_info.name = "researcher"
+    mock_agent_info.coder = MagicMock()
+    mock_agent_info.coder.uuid = "some_uuid"
+    mock_agent_service.sub_agents = {"some_uuid": mock_agent_info}
+    mock_agent_service.coder = mock_coder
+
+    monkeypatch.setattr(
+        "cecli.helpers.agents.service.AgentService.get_instance",
+        lambda *args: mock_agent_service,
+    )
+
+    # Test: sub-agent confirmation should include agent_name="researcher"
+    msg = {
+        "type": "confirmation", "question": "Are you sure?",
+        "options": {}, "coder_uuid": "some_uuid",
+    }
+    tui_instance.handle_output_message(msg)
+    mock_status_bar.show_confirm.assert_called_once_with(
+        "Are you sure?", show_all=False, allow_tweak=False,
+        allow_never=False, default="y",
+        explicit_yes_required=False, agent_name="researcher",
+    )
+
+def test_handle_output_message_error_with_agent_name(tui_instance, monkeypatch):
+    """
+    Test that error status messages display the agent name prefix.
+    """
+    mock_footer = MagicMock()
+    mock_footer.spinner_suffix = ""
+    mock_status_bar = MagicMock()
+    mock_input_area = MagicMock()
+    mock_input_container = MagicMock()
+    mock_output_container = MagicMock()
+
+    def mock_query_one(selector, *args):
+        if isinstance(selector, type):
+            name = selector.__name__
+        else:
+            if "," in selector or "#" in selector:
+                return mock_input_area
+            return mock_footer
+        mapping = {
+            "MainFooter": mock_footer,
+            "StatusBar": mock_status_bar,
+            "InputContainer": mock_input_container,
+            "InputArea": mock_input_area,
+            "OutputContainer": mock_output_container,
+        }
+        return mapping.get(name, mock_footer)
+
+    tui_instance.query_one = mock_query_one
+
+    # Mock coder worker for agent service lookups
+    mock_coder = MagicMock()
+    mock_coder.uuid = "primary_uuid"
+    tui_instance.worker = MagicMock()
+    tui_instance.worker.coder = mock_coder
+
+    # Stub status_bar reference
+    tui_instance.status_bar = mock_status_bar
+
+    # Mock AgentService - unknown UUID should return None (no prefix)
+    monkeypatch.setattr(
+        "cecli.helpers.agents.service.AgentService.get_instance",
+        lambda *args: MagicMock(sub_agents={}, coder=mock_coder),
+    )
+
+    # Test: error message for unknown agent should have agent_name=None
+    msg = {
+        "type": "error", "message": "Something went wrong!",
+        "coder_uuid": "unknown_uuid",
+    }
+    tui_instance.handle_output_message(msg)
+    mock_status_bar.show_notification.assert_called_once_with(
+        "Something went wrong!", severity="error", timeout=5,
+        agent_name=None,
+    )

From 8aa0c9298f71b541dc85e8692e26964d65d22436 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Fri, 29 May 2026 12:41:15 -0700
Subject: [PATCH 12/37] feat: Add agent name prefixes to TUI status messages

Co-authored-by: cecli (openai/nvidia_nim/deepseek-ai/deepseek-v4-pro)
---
 cecli/tui/app.py                |  3 ++-
 cecli/tui/widgets/footer.py     | 10 ++++------
 cecli/tui/widgets/status_bar.py |  2 ++
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/cecli/tui/app.py b/cecli/tui/app.py
index 15c2a12252a..730b35d9a74 100644
--- a/cecli/tui/app.py
+++ b/cecli/tui/app.py
@@ -583,7 +583,8 @@ def _resolve_agent_name(self, coder_uuid: str | None) -> str | None:
             for info in agent_service.sub_agents.values():
                 if str(info.coder.uuid) == coder_uuid:
                     return info.name
-        except Exception:
+        except (AttributeError, ImportError, KeyError):
+            # Agent service not available or coder not yet initialized
             pass
         return None
     def add_output(self, text, task_id=None):
diff --git a/cecli/tui/widgets/footer.py b/cecli/tui/widgets/footer.py
index 52523963b53..c80739c5ca0 100644
--- a/cecli/tui/widgets/footer.py
+++ b/cecli/tui/widgets/footer.py
@@ -100,11 +100,10 @@ def render(self) -> Text:
         if self.spinner_visible:
             spinner_char = self._spinner_chars[self._spinner_frame]
             left.append(f"{spinner_char} ")
-            if self.spinner_text:
-                left.append(self.spinner_text)
             if self.agent_name:
                 left.append(f"({self.agent_name}) ")
-
+            if self.spinner_text:
+                left.append(self.spinner_text)
             # When a sub-agent is generating, show its model alongside the spinner
             # if self._has_running_sub_agent():
             #     model_display = self._get_display_model()
@@ -180,9 +179,8 @@ def update_mode(self, mode: str):
         self.coder_mode = mode
         self.refresh()
 
-    def start_spinner(self, text: str = ""):
-        """Show spinner with optional text."""
     def start_spinner(self, text: str = "", agent_name: str = ""):
+        """Show spinner with optional text."""
         self.spinner_text = text
         self.agent_name = agent_name
         self.spinner_visible = True
@@ -210,8 +208,8 @@ def stop_spinner(self):
 
         self.spinner_visible = False
         self.spinner_text = ""
+        self.agent_name = ""
         self.refresh()
-
     def _has_running_sub_agent(self) -> bool:
         """Check if any agent is currently generating output."""
         try:
diff --git a/cecli/tui/widgets/status_bar.py b/cecli/tui/widgets/status_bar.py
index 66925df3176..708a3467965 100644
--- a/cecli/tui/widgets/status_bar.py
+++ b/cecli/tui/widgets/status_bar.py
@@ -180,6 +180,7 @@ def show_notification(
             text: Message to display
             severity: One of "info", "warning", "error", "success"
             timeout: Auto-dismiss after this many seconds (None = no auto-dismiss)
+            agent_name: Optional agent name to prefix the message with
         """
         # Cancel any existing timer
         if self._timer:
@@ -214,6 +215,7 @@ def show_confirm(
             allow_never: Whether to show "don't ask again" option
             default: Default response ("y" or "n")
             explicit_yes_required: Whether explicit yes is required
+            agent_name: Optional agent name to prefix the question with
         """
         # Cancel any existing timer
         if self._timer:

From 2baaf4e22628663acdf20468ad5d4adb7e6aef67 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Fri, 29 May 2026 13:59:31 -0700
Subject: [PATCH 13/37] fix: Pass coder_uuid to spinner start calls

Co-authored-by: cecli (openai/agentic)
---
 cecli/coders/base_coder.py |  8 +++-----
 cecli/tui/app.py           | 13 ++++++++-----
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/cecli/coders/base_coder.py b/cecli/coders/base_coder.py
index 2b2fbdb40be..6b189419cc6 100755
--- a/cecli/coders/base_coder.py
+++ b/cecli/coders/base_coder.py
@@ -1597,7 +1597,7 @@ async def output_task(self, preproc):
                     self.io.output_task = asyncio.create_task(self.generate(user_message, preproc))
 
                     # Start spinner for output task
-                    self.io.start_spinner("Processing...")
+                    self.io.start_spinner("Processing...", coder_uuid=getattr(self, 'uuid', None))
                     await self.io.recreate_input()
 
                 # Monitor output task
@@ -2365,7 +2365,7 @@ async def format_in_executor():
             if not self.tui:
                 spinner_text += f" • ${self.format_cost(self.total_cost)} session"
 
-            self.io.start_spinner(spinner_text)
+            self.io.start_spinner(spinner_text, coder_uuid=getattr(self, 'uuid', None))
             if self.stream:
                 self.mdstream = True
             else:
@@ -2452,9 +2452,7 @@ async def format_in_executor():
             self.mdstream = None
 
             # Ensure any waiting spinner is stopped
-            self.io.start_spinner("Processing Answer...")
-
-            self.partial_response_content = self.get_multi_response_content_in_progress(True)
+            self.io.start_spinner("Processing Answer...", coder_uuid=getattr(self, 'uuid', None))
             self.remove_reasoning_content()
             self.multi_response_content = ""
 
diff --git a/cecli/tui/app.py b/cecli/tui/app.py
index 730b35d9a74..ccbd3162a0c 100644
--- a/cecli/tui/app.py
+++ b/cecli/tui/app.py
@@ -807,15 +807,18 @@ def on_input_area_submit(self, message: InputArea.Submit):
 
         # Update footer to show processing
         footer = self.query_one(MainFooter)
-        footer.start_spinner("Processing...")
-
+        
         coder = self.worker.coder
+        # Determine which coder is in the foreground for input routing
+        foreground_coder = AgentService.get_instance(coder).foreground_coder
+        coder_uuid = str(foreground_coder.uuid) if foreground_coder and hasattr(foreground_coder, "uuid") else None
+        agent_name = self._resolve_agent_name(coder_uuid)
+
+        footer.start_spinner("Processing...", agent_name=agent_name or "")
 
         if coder:
-            coder.io.start_spinner("Processing...")
+            coder.io.start_spinner("Processing...", coder_uuid=coder_uuid)
 
-        # Determine which coder is in the foreground for input routing
-        foreground_coder = AgentService.get_instance(coder).foreground_coder
 
         if coder and is_active(getattr(coder.io, "output_task", None)):
             from cecli.helpers.conversation import ConversationService, MessageTag

From baffb29cadcfde721f9735cb691eefefc3d3de64 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Fri, 29 May 2026 14:17:41 -0700
Subject: [PATCH 14/37] fix: Prefix primary agent status when sub-agents exist

Co-authored-by: cecli (openai/agentic)
---
 cecli/tui/app.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cecli/tui/app.py b/cecli/tui/app.py
index ccbd3162a0c..e065fb90449 100644
--- a/cecli/tui/app.py
+++ b/cecli/tui/app.py
@@ -579,6 +579,8 @@ def _resolve_agent_name(self, coder_uuid: str | None) -> str | None:
             agent_service = AgentService.get_instance(self.worker.coder)
             primary_uuid = str(agent_service.coder.uuid)
             if coder_uuid == primary_uuid:
+                if agent_service.sub_agents:
+                    return "primary"
                 return None  # Primary agent gets no prefix
             for info in agent_service.sub_agents.values():
                 if str(info.coder.uuid) == coder_uuid:

From 79e99db640683816c55aeac9da658ddde11072cb Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Fri, 29 May 2026 15:38:34 -0700
Subject: [PATCH 15/37] fix: Add UUID disambiguation for duplicate agent names

Co-authored-by: cecli (openai/agentic)
---
 cecli/tui/app.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/cecli/tui/app.py b/cecli/tui/app.py
index e065fb90449..6a1f6a0b041 100644
--- a/cecli/tui/app.py
+++ b/cecli/tui/app.py
@@ -569,7 +569,11 @@ def _resolve_agent_name(self, coder_uuid: str | None) -> str | None:
         """Resolve an agent display name from a coder_uuid.
 
         Returns the sub-agent's name if the coder_uuid belongs to a known
-        sub-agent, otherwise None (primary agent uses no prefix).
+        sub-agent. For the primary agent, returns "primary" if sub-agents
+        exist, otherwise None.
+
+        If multiple sub-agents share the same name, disambiguates by
+        appending the first 3 characters of the UUID in parentheses.
         """
         if not coder_uuid:
             return None
@@ -584,6 +588,15 @@ def _resolve_agent_name(self, coder_uuid: str | None) -> str | None:
                 return None  # Primary agent gets no prefix
             for info in agent_service.sub_agents.values():
                 if str(info.coder.uuid) == coder_uuid:
+                    # Check for duplicate names among sub-agents
+                    name_count = sum(
+                        1 for i in agent_service.sub_agents.values()
+                        if i.name == info.name
+                    )
+                    if name_count > 1:
+                        # Disambiguate with first 3 UUID characters
+                        short_uuid = str(info.coder.uuid)[:3]
+                        return f"{info.name} ({short_uuid})"
                     return info.name
         except (AttributeError, ImportError, KeyError):
             # Agent service not available or coder not yet initialized
@@ -809,7 +822,7 @@ def on_input_area_submit(self, message: InputArea.Submit):
 
         # Update footer to show processing
         footer = self.query_one(MainFooter)
-        
+
         coder = self.worker.coder
         # Determine which coder is in the foreground for input routing
         foreground_coder = AgentService.get_instance(coder).foreground_coder

From 3929bfb2f972938dc3a4837674cc83d6d9f064c2 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Fri, 29 May 2026 17:34:06 -0700
Subject: [PATCH 16/37] refactor: Add coder_uuid to TUI messages and tests

---
 cecli/tui/io.py       | 11 +++--------
 cecli/utils.py        |  3 +--
 tests/tui/test_app.py | 39 +++++++++++++++++++++++----------------
 3 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/cecli/tui/io.py b/cecli/tui/io.py
index ff20b443de0..36dcb6a543f 100644
--- a/cecli/tui/io.py
+++ b/cecli/tui/io.py
@@ -408,14 +408,7 @@ def stop_spinner(self, **kwargs):
         super().stop_spinner()
 
         # Send to TUI
-        self.output_queue.put(
-            {
-                "type": "spinner",
-                "action": "stop",
-            }
-                "coder_uuid": coder_uuid,
-                "coder_uuid": coder_uuid,
-        )
+        self.output_queue.put({"type": "spinner", "action": "stop", "coder_uuid": coder_uuid})
 
     def interrupt_input(self):
         self.interrupted = True
@@ -531,6 +524,7 @@ async def confirm_ask(
         allow_never=False,
         allow_tweak=False,
         acknowledge=False,
+        coder_uuid=None,
     ):
         """Override confirm_ask to show modal instead of inline prompt.
 
@@ -607,6 +601,7 @@ async def confirm_ask(
                             "acknowledge": acknowledge,
                             "valid_responses": valid_responses,
                         },
+                        "coder_uuid": coder_uuid,
                     }
                 )
 
diff --git a/cecli/utils.py b/cecli/utils.py
index ebb928ad91b..b8a009c07eb 100644
--- a/cecli/utils.py
+++ b/cecli/utils.py
@@ -2,14 +2,13 @@
 import json
 import os
 import platform
+import shlex
 import shutil
 import subprocess
 import sys
 import tempfile
 from pathlib import Path
 
-import shlex
-
 from cecli.dump import dump  # noqa: F401
 from cecli.waiting import Spinner
 
diff --git a/tests/tui/test_app.py b/tests/tui/test_app.py
index 22d1eed160a..5d008b93ad3 100644
--- a/tests/tui/test_app.py
+++ b/tests/tui/test_app.py
@@ -37,8 +37,6 @@ def test_on_mouse_move_linux(tui_instance):
         mock_event.stop.assert_not_called()
 
 
-
-
 def test_handle_output_message_spinner_with_agent_name(tui_instance, monkeypatch):
     """
     Test that spinner status messages display the agent name prefix
@@ -95,21 +93,20 @@ def mock_query_one(selector, *args):
 
     # Test: sub-agent spinner should include agent_name="researcher"
     msg = {
-        "type": "spinner", "action": "start", "text": "Thinking...",
+        "type": "spinner",
+        "action": "start",
+        "text": "Thinking...",
         "coder_uuid": "some_uuid",
     }
     tui_instance.handle_output_message(msg)
-    mock_footer.start_spinner.assert_called_once_with(
-        "Thinking...", agent_name="researcher"
-    )
+    mock_footer.start_spinner.assert_called_once_with("Thinking...", agent_name="researcher")
 
     # Test: primary agent spinner should have agent_name=None
     mock_footer.reset_mock()
     msg["coder_uuid"] = "primary_uuid"
     tui_instance.handle_output_message(msg)
-    mock_footer.start_spinner.assert_called_once_with(
-        "Thinking...", agent_name=None
-    )
+    mock_footer.start_spinner.assert_called_once_with("Thinking...", agent_name=None)
+
 
 def test_handle_output_message_confirmation_with_agent_name(tui_instance, monkeypatch):
     """
@@ -168,16 +165,23 @@ def mock_query_one(selector, *args):
 
     # Test: sub-agent confirmation should include agent_name="researcher"
     msg = {
-        "type": "confirmation", "question": "Are you sure?",
-        "options": {}, "coder_uuid": "some_uuid",
+        "type": "confirmation",
+        "question": "Are you sure?",
+        "options": {},
+        "coder_uuid": "some_uuid",
     }
     tui_instance.handle_output_message(msg)
     mock_status_bar.show_confirm.assert_called_once_with(
-        "Are you sure?", show_all=False, allow_tweak=False,
-        allow_never=False, default="y",
-        explicit_yes_required=False, agent_name="researcher",
+        "Are you sure?",
+        show_all=False,
+        allow_tweak=False,
+        allow_never=False,
+        default="y",
+        explicit_yes_required=False,
+        agent_name="researcher",
     )
 
+
 def test_handle_output_message_error_with_agent_name(tui_instance, monkeypatch):
     """
     Test that error status messages display the agent name prefix.
@@ -224,11 +228,14 @@ def mock_query_one(selector, *args):
 
     # Test: error message for unknown agent should have agent_name=None
     msg = {
-        "type": "error", "message": "Something went wrong!",
+        "type": "error",
+        "message": "Something went wrong!",
         "coder_uuid": "unknown_uuid",
     }
     tui_instance.handle_output_message(msg)
     mock_status_bar.show_notification.assert_called_once_with(
-        "Something went wrong!", severity="error", timeout=5,
+        "Something went wrong!",
+        severity="error",
+        timeout=5,
         agent_name=None,
     )

From 5c6936f1f0e436a244ce2bb746ed8564145e9831 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Fri, 29 May 2026 17:36:46 -0700
Subject: [PATCH 17/37] cli-39: fixed linting

---
 cecli/coders/base_coder.py      |  6 +++---
 cecli/tui/app.py                | 12 +++++++-----
 cecli/tui/widgets/footer.py     |  1 +
 cecli/tui/widgets/status_bar.py |  6 +++++-
 4 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/cecli/coders/base_coder.py b/cecli/coders/base_coder.py
index 6b189419cc6..ff3dd190ea5 100755
--- a/cecli/coders/base_coder.py
+++ b/cecli/coders/base_coder.py
@@ -1597,7 +1597,7 @@ async def output_task(self, preproc):
                     self.io.output_task = asyncio.create_task(self.generate(user_message, preproc))
 
                     # Start spinner for output task
-                    self.io.start_spinner("Processing...", coder_uuid=getattr(self, 'uuid', None))
+                    self.io.start_spinner("Processing...", coder_uuid=getattr(self, "uuid", None))
                     await self.io.recreate_input()
 
                 # Monitor output task
@@ -2365,7 +2365,7 @@ async def format_in_executor():
             if not self.tui:
                 spinner_text += f" • ${self.format_cost(self.total_cost)} session"
 
-            self.io.start_spinner(spinner_text, coder_uuid=getattr(self, 'uuid', None))
+            self.io.start_spinner(spinner_text, coder_uuid=getattr(self, "uuid", None))
             if self.stream:
                 self.mdstream = True
             else:
@@ -2452,7 +2452,7 @@ async def format_in_executor():
             self.mdstream = None
 
             # Ensure any waiting spinner is stopped
-            self.io.start_spinner("Processing Answer...", coder_uuid=getattr(self, 'uuid', None))
+            self.io.start_spinner("Processing Answer...", coder_uuid=getattr(self, "uuid", None))
             self.remove_reasoning_content()
             self.multi_response_content = ""
 
diff --git a/cecli/tui/app.py b/cecli/tui/app.py
index 6a1f6a0b041..35dca51b64e 100644
--- a/cecli/tui/app.py
+++ b/cecli/tui/app.py
@@ -564,7 +564,6 @@ def handle_output_message(self, msg):
             else:
                 self._switch_to_container(target_uuid)
 
-
     def _resolve_agent_name(self, coder_uuid: str | None) -> str | None:
         """Resolve an agent display name from a coder_uuid.
 
@@ -590,8 +589,7 @@ def _resolve_agent_name(self, coder_uuid: str | None) -> str | None:
                 if str(info.coder.uuid) == coder_uuid:
                     # Check for duplicate names among sub-agents
                     name_count = sum(
-                        1 for i in agent_service.sub_agents.values()
-                        if i.name == info.name
+                        1 for i in agent_service.sub_agents.values() if i.name == info.name
                     )
                     if name_count > 1:
                         # Disambiguate with first 3 UUID characters
@@ -602,6 +600,7 @@ def _resolve_agent_name(self, coder_uuid: str | None) -> str | None:
             # Agent service not available or coder not yet initialized
             pass
         return None
+
     def add_output(self, text, task_id=None):
         """Add output to the output container."""
         output_container = self.query_one("#output", OutputContainer)
@@ -826,7 +825,11 @@ def on_input_area_submit(self, message: InputArea.Submit):
         coder = self.worker.coder
         # Determine which coder is in the foreground for input routing
         foreground_coder = AgentService.get_instance(coder).foreground_coder
-        coder_uuid = str(foreground_coder.uuid) if foreground_coder and hasattr(foreground_coder, "uuid") else None
+        coder_uuid = (
+            str(foreground_coder.uuid)
+            if foreground_coder and hasattr(foreground_coder, "uuid")
+            else None
+        )
         agent_name = self._resolve_agent_name(coder_uuid)
 
         footer.start_spinner("Processing...", agent_name=agent_name or "")
@@ -834,7 +837,6 @@ def on_input_area_submit(self, message: InputArea.Submit):
         if coder:
             coder.io.start_spinner("Processing...", coder_uuid=coder_uuid)
 
-
         if coder and is_active(getattr(coder.io, "output_task", None)):
             from cecli.helpers.conversation import ConversationService, MessageTag
 
diff --git a/cecli/tui/widgets/footer.py b/cecli/tui/widgets/footer.py
index c80739c5ca0..b0e1b0e6534 100644
--- a/cecli/tui/widgets/footer.py
+++ b/cecli/tui/widgets/footer.py
@@ -210,6 +210,7 @@ def stop_spinner(self):
         self.spinner_text = ""
         self.agent_name = ""
         self.refresh()
+
     def _has_running_sub_agent(self) -> bool:
         """Check if any agent is currently generating output."""
         try:
diff --git a/cecli/tui/widgets/status_bar.py b/cecli/tui/widgets/status_bar.py
index 708a3467965..b198a30cb27 100644
--- a/cecli/tui/widgets/status_bar.py
+++ b/cecli/tui/widgets/status_bar.py
@@ -134,6 +134,7 @@ def __init__(self, **kwargs):
         self._default = "y"
         self._explicit_yes_required = False
         self._timer = None
+
     def compose(self) -> ComposeResult:
         """Create empty container - content added dynamically."""
         yield Horizontal(classes="status-content")
@@ -171,7 +172,10 @@ def _rebuild_content(self) -> None:
                 hints.mount(Static("\\[d]on't ask again", classes="hint hint-never"))
 
     def show_notification(
-        self, text: str, severity: str = "info", timeout: float | None = 3.0,
+        self,
+        text: str,
+        severity: str = "info",
+        timeout: float | None = 3.0,
         agent_name: str | None = None,
     ) -> None:
         """Show a transient notification message.

From 4c7aba03511757111bbeae08ba3177da69d5a37f Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Fri, 29 May 2026 19:31:40 -0700
Subject: [PATCH 18/37] fix: Uncomment conversation promotion and ensure
 agent_name is string

Co-authored-by: cecli (openai/gemini_cli_local/gemini-2.5-pro)
---
 cecli/coders/base_coder.py | 8 ++++----
 cecli/tui/app.py           | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/cecli/coders/base_coder.py b/cecli/coders/base_coder.py
index ff3dd190ea5..b8e701a35d8 100755
--- a/cecli/coders/base_coder.py
+++ b/cecli/coders/base_coder.py
@@ -2921,8 +2921,8 @@ async def process_tool_calls(self, tool_call_response):
                     message_dict=tool_response,
                     tag=MessageTag.CUR,
                     hash_key=(tool_response["tool_call_id"], str(time.monotonic_ns())),
-                    # promotion=ConversationService.get_manager(self).DEFAULT_TAG_PROMOTION_VALUE,
-                    # mark_for_demotion=1,
+                    promotion=ConversationService.get_manager(self).DEFAULT_TAG_PROMOTION_VALUE,
+                    mark_for_demotion=1,
                 )
 
         return bool(tool_responses)
@@ -3135,8 +3135,8 @@ async def add_assistant_reply_to_cur_messages(self):
                 message_dict=msg,
                 tag=MessageTag.CUR,
                 hash_key=("assistant_message", str(msg), str(time.monotonic_ns())),
-                # promotion=ConversationService.get_manager(self).DEFAULT_TAG_PROMOTION_VALUE,
-                # mark_for_demotion=1,
+                promotion=ConversationService.get_manager(self).DEFAULT_TAG_PROMOTION_VALUE,
+                mark_for_demotion=1,
             )
 
     def get_file_mentions(self, content, ignore_current=False):
diff --git a/cecli/tui/app.py b/cecli/tui/app.py
index 35dca51b64e..86ac5090d7b 100644
--- a/cecli/tui/app.py
+++ b/cecli/tui/app.py
@@ -702,7 +702,7 @@ def update_spinner(self, msg, agent_name: str | None = None):
         action = msg.get("action", "start")
 
         if action == "start":
-            footer.start_spinner(msg.get("text", ""), agent_name=agent_name)
+            footer.start_spinner(msg.get("text", ""), agent_name=agent_name or "")
         elif action == "update":
             footer.spinner_text = msg.get("text", "")
         elif action == "update_suffix":

From 5d59ec163168b2f8b99179ce0ab52a165ca4af2c Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Fri, 29 May 2026 19:44:51 -0700
Subject: [PATCH 19/37] fix: Improve footer widget robustness in test
 environments

Co-authored-by: cecli (openai/gemini_cli_local/gemini-2.5-pro)
---
 cecli/tui/widgets/footer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cecli/tui/widgets/footer.py b/cecli/tui/widgets/footer.py
index b0e1b0e6534..5f77cdae230 100644
--- a/cecli/tui/widgets/footer.py
+++ b/cecli/tui/widgets/footer.py
@@ -79,7 +79,7 @@ def _get_display_model(self) -> str:
             else:
                 name = coder.get_active_model().name
         except Exception:
-            name = self.app.worker.coder.get_active_model().name
+            name = self.model_name
 
         # Strip common prefixes like "openrouter/x-ai/"
         if len(name) > 40:

From 9b6f1eebc88449ee885b22f1b92dbcd0710c6135 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Fri, 29 May 2026 20:04:12 -0700
Subject: [PATCH 20/37] fix: Improve agent name resolution and conversation
 history handling

Co-authored-by: cecli (openai/gemini_cli_local/gemini-2.5-pro)
---
 cecli/coders/base_coder.py |  8 ++++----
 cecli/tui/app.py           | 12 +++++++++++-
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/cecli/coders/base_coder.py b/cecli/coders/base_coder.py
index b8e701a35d8..ff3dd190ea5 100755
--- a/cecli/coders/base_coder.py
+++ b/cecli/coders/base_coder.py
@@ -2921,8 +2921,8 @@ async def process_tool_calls(self, tool_call_response):
                     message_dict=tool_response,
                     tag=MessageTag.CUR,
                     hash_key=(tool_response["tool_call_id"], str(time.monotonic_ns())),
-                    promotion=ConversationService.get_manager(self).DEFAULT_TAG_PROMOTION_VALUE,
-                    mark_for_demotion=1,
+                    # promotion=ConversationService.get_manager(self).DEFAULT_TAG_PROMOTION_VALUE,
+                    # mark_for_demotion=1,
                 )
 
         return bool(tool_responses)
@@ -3135,8 +3135,8 @@ async def add_assistant_reply_to_cur_messages(self):
                 message_dict=msg,
                 tag=MessageTag.CUR,
                 hash_key=("assistant_message", str(msg), str(time.monotonic_ns())),
-                promotion=ConversationService.get_manager(self).DEFAULT_TAG_PROMOTION_VALUE,
-                mark_for_demotion=1,
+                # promotion=ConversationService.get_manager(self).DEFAULT_TAG_PROMOTION_VALUE,
+                # mark_for_demotion=1,
             )
 
     def get_file_mentions(self, content, ignore_current=False):
diff --git a/cecli/tui/app.py b/cecli/tui/app.py
index 86ac5090d7b..9d151bff074 100644
--- a/cecli/tui/app.py
+++ b/cecli/tui/app.py
@@ -577,19 +577,29 @@ def _resolve_agent_name(self, coder_uuid: str | None) -> str | None:
         if not coder_uuid:
             return None
         try:
+            if not self.worker or not self.worker.coder:
+                return None  # Cannot resolve without a coder
             from cecli.helpers.agents.service import AgentService
 
             agent_service = AgentService.get_instance(self.worker.coder)
+            if not agent_service:
+                return None
             primary_uuid = str(agent_service.coder.uuid)
             if coder_uuid == primary_uuid:
                 if agent_service.sub_agents:
                     return "primary"
                 return None  # Primary agent gets no prefix
+            if not agent_service.sub_agents:
+                return None
             for info in agent_service.sub_agents.values():
+                if not info or not info.coder:
+                    continue
                 if str(info.coder.uuid) == coder_uuid:
                     # Check for duplicate names among sub-agents
                     name_count = sum(
-                        1 for i in agent_service.sub_agents.values() if i.name == info.name
+                        1
+                        for i in agent_service.sub_agents.values()
+                        if i and hasattr(i, "name") and i.name == info.name
                     )
                     if name_count > 1:
                         # Disambiguate with first 3 UUID characters

From 6d899523c5e6b78be5512c93b50dc04aa2d0a976 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Sat, 30 May 2026 12:46:59 -0400
Subject: [PATCH 21/37] - Make `Delegate` tool non-blocking - Change `Finished`
 tool to `Yield` tool

---
 cecli/coders/agent_coder.py               |  47 ++++++-
 cecli/helpers/agents/service.py           | 143 ++++++++++++++++++--
 cecli/helpers/conversation/integration.py |  30 +++++
 cecli/prompts/agent.yml                   |   2 +-
 cecli/prompts/subagent.yml                |   7 +-
 cecli/tools/__init__.py                   |   4 +-
 cecli/tools/_yield.py                     | 153 ++++++++++++++++++++++
 cecli/tools/delegate.py                   |  43 +++---
 cecli/tools/finished.py                   |  86 ------------
 cecli/tools/utils/registry.py             |   2 +-
 cecli/website/docs/config/agent-mode.md   |   6 +-
 cecli/website/docs/config/subagents.md    |   2 +-
 tests/subagents/test_commands.py          | 109 +--------------
 tests/subagents/test_delegate.py          |  58 +++++---
 tests/subagents/test_finished.py          |  21 ++-
 tests/subagents/test_service.py           |  67 ++++++----
 tests/tools/test_registry.py              |  16 +--
 17 files changed, 486 insertions(+), 310 deletions(-)
 create mode 100644 cecli/tools/_yield.py
 delete mode 100644 cecli/tools/finished.py

diff --git a/cecli/coders/agent_coder.py b/cecli/coders/agent_coder.py
index 9c5e5816a03..822e640d0b5 100644
--- a/cecli/coders/agent_coder.py
+++ b/cecli/coders/agent_coder.py
@@ -540,6 +540,10 @@ def format_chat_chunks(self):
 
         # Add post-message context blocks (priority 250 - between CUR and REMINDER)
         ConversationService.get_chunks(self).add_post_message_context_blocks()
+
+        # Add sub-agent states context block (same priority as post-message blocks)
+        ConversationService.get_chunks(self).add_sub_agent_states()
+
         ConversationService.get_chunks(self).add_randomized_cta()
 
         return ConversationService.get_manager(self).get_messages_dict()
@@ -938,7 +942,7 @@ async def reply_completed(self):
         if self.tool_call_vectors:
             if content and not tool_calls_found and self.num_reflections < self.max_reflections:
                 self.reflected_message = (
-                    "Continue with your task. If you have completed it, call the `Finished` tool."
+                    "Continue with your task. If you have completed it, call the `Yield` tool."
                 )
                 return True
 
@@ -1490,12 +1494,53 @@ def get_sub_agents_context(self):
                 result += "\n"
 
             result += "Use the `Delegate` tool with the sub-agent name to delegate tasks.\n"
+            result += "Use the `Yield` tool to wait for responses for all active sub agents.\n"
             result += "</context>"
             return result
         except Exception as e:
             self.io.tool_error(f"Error generating sub-agents context: {str(e)}")
             return None
 
+    def get_child_agent_states(self):
+        """Get the state of all active child sub-agents.
+
+        Returns a formatted context block with each child sub-agent's name,
+        UUID, and current status, or None if no children exist.
+        This is used by ConversationChunks.add_sub_agent_states() to provide
+        the model with visibility into active sub-agent states.
+        """
+        if not self.use_enhanced_context:
+            return None
+
+        # Sub-agents should only see child states when nested delegation is enabled
+        if hasattr(self, "parent_uuid") and self.parent_uuid:
+            if not self.agent_config.get("allow_nested_delegation", False):
+                return None
+
+        try:
+            service = AgentService.get_instance(self)
+            children = service.get_children(self)
+
+            if not children:
+                return None
+
+            result = '<context name="sub_agent_states" from="agent">\n'
+            result += "## Active Sub-Agent States\n\n"
+            result += f"Found {len(children)} active child sub-agent(s):\n\n"
+
+            for info in children:
+                result += f"**{info.name}**:\n"
+                result += f"  - UUID: `{info.coder.uuid}`\n"
+                result += f"  - Status: {info.status.value}\n"
+                if info.error:
+                    result += f"  - Error: {info.error}\n"
+                result += "\n"
+            result += "</context>"
+            return result
+        except Exception as e:
+            self.io.tool_error(f"Error generating child agent states: {str(e)}")
+            return None
+
     def get_background_command_output(self):
         """
         Get background command output to append after the main message.
diff --git a/cecli/helpers/agents/service.py b/cecli/helpers/agents/service.py
index fc131030daa..34fe637901b 100644
--- a/cecli/helpers/agents/service.py
+++ b/cecli/helpers/agents/service.py
@@ -6,6 +6,7 @@
 
 import asyncio
 import logging
+import time
 import weakref
 from dataclasses import dataclass
 from enum import Enum
@@ -59,6 +60,10 @@ class AgentService:
     _global_registry: Dict[str, Any] = {}  # name -> SubAgentConfig (from .md files)
     # UUID -> weakref of coder instance for convenient lookup
     _uuid_coder_map: Dict[str, weakref.ref] = {}
+    # Lock pools keyed by parent UUID — created lazily so only parents that
+    # actually use them allocate a lock.
+    _spawn_locks: Dict[str, asyncio.Lock] = {}
+    _conversation_locks: Dict[str, asyncio.Lock] = {}
 
     # ------------------------------------------------------------------ #
     # Singleton
@@ -198,6 +203,13 @@ def max_sub_agents(self) -> int:
 
     # ------------------------------------------------------------------ #
     # Internal helpers
+    @classmethod
+    def _get_lock(cls, pool: Dict[str, asyncio.Lock], uuid: str) -> asyncio.Lock:
+        """Return a lock for *uuid* from *pool*, creating one if absent."""
+        if uuid not in pool:
+            pool[uuid] = asyncio.Lock()
+        return pool[uuid]
+
     @staticmethod
     def _get_tui(coder: Any) -> Any:
         """Dereference the TUI weakref from a coder, returning None if unavailable.
@@ -335,12 +347,18 @@ async def _create_sub_agent_coder(
                 f"Unknown sub-agent '{name}'. " f"Available: {list(self._global_registry.keys())}"
             )
 
-        self._check_max_sub_agents()
+        # Critical section: max-sub-agent check and registration must be atomic
+        # to prevent TOCTOU race when multiple spawns fire concurrently.
+        # Coder.create() is called *outside* the lock to avoid holding an
+        # await across a lock (which risks deadlock if Coder.create() ever
+        # tried to acquire the same lock).
+        parent_coder = parent if parent is not None else self.coder
 
-        from cecli.coders import Coder
+        async with self._get_lock(self._spawn_locks, parent_coder.uuid):
+            self._check_max_sub_agents()
+            new_uuid = str(uuid4())
 
-        parent_coder = parent if parent is not None else self.coder
-        new_uuid = str(uuid4())
+        from cecli.coders import Coder
 
         kwargs = dict(
             io=parent_coder.io,
@@ -362,18 +380,23 @@ async def _create_sub_agent_coder(
         new_coder = await Coder.create(**kwargs)
         # IOProxy wrapping is handled by base_coder.py's Coder.__init__
 
-        # Register in global coder lookup
-        self._uuid_coder_map[new_uuid] = weakref.ref(new_coder)
+        # Re-acquire the lock to register — we must re-check max agents since
+        # the lock was released and other spawns may have registered in between.
+        async with self._get_lock(self._spawn_locks, parent_coder.uuid):
+            self._check_max_sub_agents()
 
-        info = SubAgentInfo(
-            name=name,
-            coder=new_coder,
-            parent_uuid=parent_coder.uuid,
-            status=SubAgentStatus.CREATED,
-        )
+            # Register in global coder lookup
+            self._uuid_coder_map[new_uuid] = weakref.ref(new_coder)
+
+            info = SubAgentInfo(
+                name=name,
+                coder=new_coder,
+                parent_uuid=parent_coder.uuid,
+                status=SubAgentStatus.CREATED,
+            )
 
-        self.sub_agents[new_coder.uuid] = info
-        self._sub_agent_order.append(new_coder.uuid)
+            self.sub_agents[new_coder.uuid] = info
+            self._sub_agent_order.append(new_coder.uuid)
 
         # Notify TUI to create a container
         try:
@@ -432,6 +455,21 @@ def start_generate_task(self, info: SubAgentInfo, user_message: str) -> asyncio.
         for cancellation/monitoring.
 
         Args:
+
+        .. note::
+
+            **Ordering dependency with mark_sub_agent_finished()**
+
+            ``mark_sub_agent_finished()`` (called *synchronously* inside the tool
+            execution pipeline of ``generate()``) writes ``info.status`` and
+            ``info.summary`` before ``generate()`` returns to this task.
+
+            The ``if info.status == SubAgentStatus.RUNNING:`` guard below correctly
+            prevents the task from overwriting those values with defaults.
+
+            This ordering is currently safe because tool execution is synchronous.
+            If tool execution is refactored to introduce interleaved ``await`` points,
+            this dependency would break and an ``asyncio.Event`` would be needed.
             info: The SubAgentInfo for the sub-agent.
             user_message: The user message to pass to ``generate()``.
 
@@ -446,10 +484,12 @@ async def _run_generate():
                 if info.status == SubAgentStatus.RUNNING:
                     info.status = SubAgentStatus.FINISHED
                     info.summary = info.summary or DEFAULT_SUMMARY_COMPLETED
+                await self._inject_sub_agent_result(info)
             except asyncio.CancelledError:
                 info.status = SubAgentStatus.FINISHED
                 info.summary = info.summary or DEFAULT_SUMMARY_INTERRUPTED
                 logger.debug("Sub-agent %s generate cancelled (interrupted)", info.name)
+                await self._inject_sub_agent_result(info)
                 raise
             except Exception as exc:
                 info.status = SubAgentStatus.ERROR
@@ -460,6 +500,7 @@ async def _run_generate():
                     exc,
                     exc_info=True,
                 )
+                await self._inject_sub_agent_result(info)
                 raise
 
         # Cancel any previous generate task to prevent duplicate concurrent generates
@@ -468,8 +509,79 @@ async def _run_generate():
 
         task = asyncio.create_task(_run_generate())
         info.generate_task = task
+        # Suppress "Task exception was never retrieved" for fire-and-forget tasks
+        task.add_done_callback(lambda t: t.exception() if not t.cancelled() else None)
         return task
 
+    async def _inject_sub_agent_result(self, info: SubAgentInfo) -> None:
+        """Inject the sub-agent's result (summary/error) into the parent's conversation.
+
+        Adds a user message with the result content and an assistant acknowledgment
+        so the parent coder (and therefore the LLM) sees what the sub-agent produced.
+        Uses unique hash keys so multiple sub-agent completions don't overwrite each other.
+        """
+        from cecli.helpers.conversation.service import ConversationService
+        from cecli.helpers.conversation.tags import MessageTag
+
+        # Capture coder UUID early in case the sub-agent is cleaned up before
+        # this method completes (the weakref could become invalid).
+        coder_uuid = getattr(info.coder, "uuid", "(unknown)")
+
+        parent_coder_ref = self._uuid_coder_map.get(info.parent_uuid)
+        if not parent_coder_ref:
+            return
+
+        parent_coder = parent_coder_ref()
+        if not parent_coder:
+            return
+
+        if info.status == SubAgentStatus.ERROR:
+            user_content = (
+                f"The **{info.name}** agent (`{coder_uuid}`) encountered an error:\n"
+                f"{info.error}"
+            )
+            assistant_content = (
+                f"The {info.name} agent `{coder_uuid}` failed with the error above. "
+                f"You may want to review or retry the delegation."
+            )
+        elif info.status == SubAgentStatus.FINISHED:
+            is_interrupted = info.summary == DEFAULT_SUMMARY_INTERRUPTED
+            summary_text = info.summary or DEFAULT_SUMMARY_COMPLETED
+            if is_interrupted:
+                user_content = (
+                    f"The **{info.name}** agent (`{coder_uuid}`) was interrupted:\n"
+                    f"{summary_text}"
+                )
+                assistant_content = (
+                    f"The {info.name} agent `{coder_uuid}` was interrupted before completing its task. "
+                    f"You may want to review or retry the delegation."
+                )
+            else:
+                user_content = (
+                    f"The **{info.name}** agent (`{coder_uuid}`) completed with the following summary:\n"
+                    f"{summary_text}"
+                )
+                assistant_content = (
+                    f"Thank you for sharing the summary for {info.name} agent `{coder_uuid}`. "
+                    f"The agent has finished its task."
+                )
+        else:
+            return
+
+        async with self._get_lock(self._conversation_locks, info.parent_uuid):
+            ConversationService.get_manager(parent_coder).add_message(
+                message_dict={"role": "user", "content": user_content},
+                tag=MessageTag.CUR,
+                hash_key=("sub_agent_result", "user", coder_uuid, str(time.monotonic_ns())),
+                force=True,
+            )
+            ConversationService.get_manager(parent_coder).add_message(
+                message_dict={"role": "assistant", "content": assistant_content},
+                tag=MessageTag.CUR,
+                hash_key=("sub_agent_result", "assistant", coder_uuid, str(time.monotonic_ns())),
+                force=True,
+            )
+
     async def invoke(
         self, name: str, prompt: str, blocking: bool = True, parent: Any = None
     ) -> Optional[str]:
@@ -610,4 +722,7 @@ def cleanup_all_for_parent(self) -> None:
         """Clean up all sub-agents when the parent session ends."""
         for uuid in list(self.sub_agents.keys()):
             self._cleanup_sub_agent(uuid)
+        # Clean up lock pools to prevent memory leaks
+        self._spawn_locks.pop(self.coder.uuid, None)
+        self._conversation_locks.pop(self.coder.uuid, None)
         self._instances.pop(self.coder.uuid, None)
diff --git a/cecli/helpers/conversation/integration.py b/cecli/helpers/conversation/integration.py
index 3c5796c1139..f5c046728e2 100644
--- a/cecli/helpers/conversation/integration.py
+++ b/cecli/helpers/conversation/integration.py
@@ -1013,6 +1013,36 @@ def add_post_message_context_blocks(self) -> None:
                 force=True,
             )
 
+    def add_sub_agent_states(self) -> None:
+        """
+        Add sub-agent states context block to conversation (priority 250).
+
+        Sub-agent states include: name, UUID, and status (CREATED, RUNNING,
+        FINISHED, ERROR) of each active child sub-agent.
+        """
+        coder = self.get_coder()
+        if not coder:
+            return
+
+        if not hasattr(coder, "use_enhanced_context") or not coder.use_enhanced_context:
+            return
+
+        if not hasattr(coder, "get_child_agent_states"):
+            return
+
+        block = coder.get_child_agent_states()
+        if not block:
+            return
+
+        ConversationService.get_manager(coder).add_message(
+            message_dict={"role": "user", "content": block},
+            tag=MessageTag.STATIC,
+            priority=DEFAULT_TAG_PRIORITY[MessageTag.REMINDER] + 25,  # After post_message blocks
+            mark_for_delete=0,
+            hash_key=("sub_agent_states",),
+            force=True,
+        )
+
     def defer_removal(self, file_path: str):
         self._deferred_removals.add(file_path)
 
diff --git a/cecli/prompts/agent.yml b/cecli/prompts/agent.yml
index 730e5975bac..988538838a5 100644
--- a/cecli/prompts/agent.yml
+++ b/cecli/prompts/agent.yml
@@ -46,7 +46,7 @@ main_system: |
   2. **Explore**: Use discovery tools (`ExploreCode`, `Grep`, `Ls`) to research and gather understanding for you task. Modify search terms when errors are encountered.
   3. **Execute**: Mark files as editable with `ContextManager` before attempting edits.  Proactively use skills if they are available. Review diff outputs after edit to ensure the proper changes were made.
   4. **Verify & Recover**: If an edit fails or introduces linting errors, use `UndoChange` immediately.
-  5. **Finished**: Use the `Finished` tool only after verifying the solution. Briefly summarize the changes for the user.
+  5. **Yield**: Use the `Yield` tool only after verifying the solution. Briefly summarize the changes for the user.
 
   ## Todo List Management
   - Break complex goals into meaningful sub-tasks so the problem remains tractable
diff --git a/cecli/prompts/subagent.yml b/cecli/prompts/subagent.yml
index a260dc9a5f3..1339499995c 100644
--- a/cecli/prompts/subagent.yml
+++ b/cecli/prompts/subagent.yml
@@ -31,7 +31,7 @@ main_system: |
   2. **Explore**: Use discovery tools (`ExploreCode`, `Grep`, `Ls`) to research and gather understanding for you task. Modify search terms when errors are encountered.
   3. **Execute**: Mark files as editable with `ContextManager` before attempting edits.  Proactively use skills if they are available. Review diff outputs after edit to ensure the proper changes were made.
   4. **Verify & Recover**: If an edit fails or introduces linting errors, use `UndoChange` immediately.
-  5. **Finished**: Use the `Finished` tool only after verifying the solution. Briefly summarize the changes for the user.
+  5. **Yield**: Use the `Yield` tool only after verifying the solution. Briefly summarize the changes for the user.
 
   ## Todo List Management
   - Break complex goals into meaningful sub-tasks so the problem remains tractable
@@ -54,8 +54,9 @@ system_reminder: |
   - **Responses**: Reason out loud through the problem but be brief.
 
   **Finishing Up**: 
-  Be very detailed in your `Finished` tool summary in describing your task, findings, efforts and results.
-  Include all of your final response inside the "summary" text so maximum information is available to the user.
+  Be very detailed in your `Yield` tool summary in describing your task, findings, efforts and results.
+  Include all of your final response inside the "summary" text.
+  Please be verbose so as much detail is made available to the user as possible.
 
   {lazy_prompt}
   {shell_cmd_reminder}
diff --git a/cecli/tools/__init__.py b/cecli/tools/__init__.py
index 07b1754aa31..44e527cff37 100644
--- a/cecli/tools/__init__.py
+++ b/cecli/tools/__init__.py
@@ -3,13 +3,13 @@
 
 # Import all tool modules
 from . import (
+    _yield,
     command,
     command_interactive,
     context_manager,
     delegate,
     edit_text,
     explore_code,
-    finished,
     git_branch,
     git_diff,
     git_log,
@@ -34,7 +34,7 @@
     delegate,
     edit_text,
     explore_code,
-    finished,
+    _yield,
     git_branch,
     git_diff,
     git_log,
diff --git a/cecli/tools/_yield.py b/cecli/tools/_yield.py
new file mode 100644
index 00000000000..c1372c1bdca
--- /dev/null
+++ b/cecli/tools/_yield.py
@@ -0,0 +1,153 @@
+import asyncio
+import json
+import logging
+
+from cecli.tools.utils.base_tool import BaseTool
+from cecli.tools.utils.output import color_markers, tool_footer, tool_header
+
+logger = logging.getLogger(__name__)
+
+
+class Tool(BaseTool):
+    NORM_NAME = "yield"
+    TRACK_INVOCATIONS = False
+    SCHEMA = {
+        "type": "function",
+        "function": {
+            "name": "Yield",
+            "description": "Yield control back to the user, indicating all sub-goals are complete.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "summary": {
+                        "type": "string",
+                        "description": (
+                            "Optional summary of what was accomplished. "
+                            "When called by a sub-agent, this summary is captured "
+                            "and returned to the parent agent."
+                        ),
+                    },
+                },
+                "required": [],
+            },
+        },
+    }
+
+    @classmethod
+    async def execute(cls, coder, **kwargs):
+        """
+        Mark that the current generation task needs no further effort.
+
+        This gives the LLM explicit control over when it can stop looping
+        """
+        cls.clear_invocation_cache()
+
+        if coder:
+            # Check for active child sub-agents and await their tasks before finishing
+            try:
+                from cecli.helpers.agents.service import AgentService
+
+                agent_service = AgentService.get_instance(coder)
+                children = agent_service.get_children(coder)
+                active_tasks = [
+                    info.generate_task
+                    for info in children
+                    if info.generate_task is not None and not info.generate_task.done()
+                ]
+
+                if active_tasks:
+                    coder.io.tool_warning(
+                        f"Waiting for {len(active_tasks)} sub-agent(s) to complete before yielding..."
+                    )
+
+                    # Single asyncio.wait that includes both the sub-agent tasks and
+                    # the interrupt event, avoiding nested asyncio.wait() calls.
+                    interrupt_event = coder.interrupt_event
+                    if interrupt_event is None:
+                        interrupt_event = asyncio.Event()
+
+                    interrupt_task = asyncio.create_task(interrupt_event.wait())
+                    pending = set(active_tasks) | {interrupt_task}
+
+                    while any(t in pending for t in active_tasks):
+                        done, still_pending = await asyncio.wait(
+                            pending, timeout=5.0, return_when=asyncio.FIRST_COMPLETED
+                        )
+                        pending = still_pending
+
+                        if interrupt_task in done:
+                            # Interrupted — cancel remaining sub-agent tasks
+                            for t in pending:
+                                t.cancel()
+                                try:
+                                    await t
+                                except (asyncio.CancelledError, Exception):
+                                    pass
+                            return (
+                                "Yield interrupted while waiting for sub-agents. "
+                                "Sub-agent outputs above may be incomplete."
+                            )
+
+                        # Retrieve exceptions from completed sub-agent tasks so they
+                        # are not silently lost.
+                        for t in done:
+                            if t is not interrupt_task:
+                                exc = t.exception()
+                                if exc:
+                                    logger.warning("Sub-agent task raised an exception: %s", exc)
+
+                    # Cancel the interrupt task since we are done waiting
+                    if not interrupt_task.done():
+                        interrupt_task.cancel()
+                        try:
+                            await interrupt_task
+                        except asyncio.CancelledError:
+                            pass
+
+                    # Fall through to the normal finishing flow below
+            except Exception as e:
+                logger.warning("Error awaiting child sub-agents before yield: %s", e)
+
+            coder.agent_finished = True
+
+            # If this is a sub-agent, capture the summary for the parent
+            summary = kwargs.get("summary", None)
+            parent_uuid = coder.parent_uuid
+            if parent_uuid:
+                try:
+                    from cecli.helpers.agents.service import AgentService
+
+                    AgentService.mark_sub_agent_finished(
+                        sub_coder_uuid=coder.uuid,
+                        parent_uuid=parent_uuid,
+                        summary=summary,
+                    )
+                except Exception:
+                    pass
+
+            if coder.files_edited_by_tools:
+                _ = await coder.auto_commit(coder.files_edited_by_tools)
+                coder.files_edited_by_tools = set()
+
+            if summary:
+                return f"Yielded. Summary: {summary}"
+            return "Yielded."
+
+        # coder.io.tool_Error("Error: Could not mark agent task as finished")
+        return "Error: Could not yield control"
+
+    @classmethod
+    def format_output(cls, coder, mcp_server, tool_response):
+        color_start, color_end = color_markers(coder)
+        params = json.loads(tool_response.function.arguments)
+
+        tool_header(coder=coder, mcp_server=mcp_server, tool_response=tool_response)
+
+        summary = params.get("summary")
+        if summary:
+            coder.io.tool_output("")
+            coder.io.tool_output(f"{color_start}Summary:{color_end}")
+            coder.io.tool_output(summary)
+            coder.io.tool_output("")
+
+        tool_footer(coder=coder, tool_response=tool_response)
diff --git a/cecli/tools/delegate.py b/cecli/tools/delegate.py
index e01cebc2e17..660db6c276d 100644
--- a/cecli/tools/delegate.py
+++ b/cecli/tools/delegate.py
@@ -65,37 +65,34 @@ async def execute(cls, coder, **kwargs):
         from cecli.helpers.agents.service import AgentService
 
         agent_service = AgentService.get_instance(coder)
-        # Track results with status flag instead of fragile emoji checks
-        results: list[tuple[bool, str]] = []
 
-        async def _run_one(name: str, prompt: str) -> tuple[bool, str]:
-            """Run a single sub-agent and return a (success, formatted_message) tuple."""
+        async def _spawn_one(name: str, prompt: str) -> tuple[str, str]:
+            """Spawn a single sub-agent and return (name, uuid_or_error)."""
             try:
-                agent_service._check_max_sub_agents()
-                summary = await agent_service.invoke(name, prompt, parent=coder, blocking=True)
-                if summary:
-                    return True, f"Sub-agent '{name}' completed:\n{summary}"
-                return True, f"Sub-agent '{name}' completed (no summary)."
-            except (ValueError, RuntimeError) as e:
-                return False, f"Sub-agent '{name}' failed: {e}"
+                new_coder, info = await agent_service.spawn(name, prompt, parent=coder)
+                return name, info.coder.uuid
             except Exception as e:
-                return False, f"Sub-agent '{name}' failed with unexpected error: {e}"
+                return name, f"failed: {e}"
 
-        # Dispatch all delegations in parallel
-        tasks = [_run_one(d["name"], d["prompt"]) for d in delegations]
+        # Dispatch all delegations in parallel (spawn is fire-and-forget, but
+        # _create_sub_agent_coder is async so we gather for concurrency)
+        tasks = [_spawn_one(d["name"], d["prompt"]) for d in delegations]
         raw_results = await asyncio.gather(*tasks)
 
-        # Separate success flag from message
-        for success, msg in raw_results:
-            results.append((success, msg))
+        started_agents: list[tuple[str, str]] = list(raw_results)
 
         # Build a consolidated report
-        n_ok = sum(1 for ok, _ in results if ok)
-        n_total = len(results)
-        separator = "\n" + "─" * 60 + "\n"
-        combined = separator.join(msg for _, msg in results)
-
-        return f"📋 Delegation results ({n_ok}/{n_total} succeeded):" f"{separator}{combined}"
+        lines = []
+        for name, result in started_agents:
+            if result.startswith("failed:"):
+                lines.append(f"❌ **{name}**: {result}")
+            else:
+                lines.append(f"✅ **{name}** agent started with id `{result}`")
+
+        n_total = len(started_agents)
+        n_ok = sum(1 for _, r in started_agents if not r.startswith("failed:"))
+        combined = "\n".join(lines)
+        return f"📋 Delegation results ({n_ok}/{n_total} dispatched):\n{combined}"
 
     @classmethod
     def format_output(cls, coder, mcp_server, tool_response):
diff --git a/cecli/tools/finished.py b/cecli/tools/finished.py
deleted file mode 100644
index b099d1eca90..00000000000
--- a/cecli/tools/finished.py
+++ /dev/null
@@ -1,86 +0,0 @@
-import json
-
-from cecli.tools.utils.base_tool import BaseTool
-from cecli.tools.utils.output import color_markers, tool_footer, tool_header
-
-
-class Tool(BaseTool):
-    NORM_NAME = "finished"
-    TRACK_INVOCATIONS = False
-    SCHEMA = {
-        "type": "function",
-        "function": {
-            "name": "Finished",
-            "description": (
-                "Declare that we are done with every single sub goal and no further work is needed."
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "summary": {
-                        "type": "string",
-                        "description": (
-                            "Optional summary of what was accomplished. "
-                            "When called by a sub-agent, this summary is captured "
-                            "and returned to the parent agent."
-                        ),
-                    },
-                },
-                "required": [],
-            },
-        },
-    }
-
-    @classmethod
-    async def execute(cls, coder, **kwargs):
-        """
-        Mark that the current generation task needs no further effort.
-
-        This gives the LLM explicit control over when it can stop looping
-        """
-        cls.clear_invocation_cache()
-
-        if coder:
-            coder.agent_finished = True
-
-            # If this is a sub-agent, capture the summary for the parent
-            summary = kwargs.get("summary", None)
-            parent_uuid = coder.parent_uuid
-            if parent_uuid:
-                try:
-                    from cecli.helpers.agents.service import AgentService
-
-                    AgentService.mark_sub_agent_finished(
-                        sub_coder_uuid=coder.uuid,
-                        parent_uuid=parent_uuid,
-                        summary=summary,
-                    )
-                except Exception:
-                    pass
-
-            if coder.files_edited_by_tools:
-                _ = await coder.auto_commit(coder.files_edited_by_tools)
-                coder.files_edited_by_tools = set()
-
-            if summary:
-                return f"Task Finished! Summary: {summary}"
-            return "Task Finished!"
-
-        # coder.io.tool_Error("Error: Could not mark agent task as finished")
-        return "Error: Could not mark agent task as finished"
-
-    @classmethod
-    def format_output(cls, coder, mcp_server, tool_response):
-        color_start, color_end = color_markers(coder)
-        params = json.loads(tool_response.function.arguments)
-
-        tool_header(coder=coder, mcp_server=mcp_server, tool_response=tool_response)
-
-        summary = params.get("summary")
-        if summary:
-            coder.io.tool_output("")
-            coder.io.tool_output(f"{color_start}Summary:{color_end}")
-            coder.io.tool_output(summary)
-            coder.io.tool_output("")
-
-        tool_footer(coder=coder, tool_response=tool_response)
diff --git a/cecli/tools/utils/registry.py b/cecli/tools/utils/registry.py
index 45f333301ff..fe0bffc8081 100644
--- a/cecli/tools/utils/registry.py
+++ b/cecli/tools/utils/registry.py
@@ -19,7 +19,7 @@ class ToolRegistry:
     """Registry for tool discovery and management."""
 
     _tools: Dict[str, Type] = {}  # normalized name -> Tool class
-    _essential_tools: Set[str] = {"contextmanager", "edittext", "finished"}
+    _essential_tools: Set[str] = {"contextmanager", "edittext", "yield"}
     _registry: Dict[str, Type] = {}  # cached filtered registry
     loaded_custom_tools: List[str] = []
 
diff --git a/cecli/website/docs/config/agent-mode.md b/cecli/website/docs/config/agent-mode.md
index 45c13d473c8..f6c29774c55 100644
--- a/cecli/website/docs/config/agent-mode.md
+++ b/cecli/website/docs/config/agent-mode.md
@@ -38,7 +38,7 @@ Agent Mode operates through a continuous loop where the LLM:
 3. **Executes editing tools** to make changes
 4. **Processes results** and continues exploration and editing until the task is complete
 
-This loop continues automatically until the `Finished` tool is called, or the maximum number of iterations is reached.
+This loop continues automatically until the `Yield` tool is called, or the maximum number of iterations is reached.
 
 ### Key Components
 
@@ -50,7 +50,7 @@ Agent Mode uses a centralized local tool registry that manages all available too
 - **Editing Tools**: `EditText`,
 - **Context Management Tools**: `ContextManager`, `GetLines`
 - **Git Tools**: `GitDiff`, `GitLog`, `GitShow`, `GitStatus`
-- **Utility Tools**: `UpdateTodoList`, `UndoChange`, `Finished`
+- **Utility Tools**: `UpdateTodoList`, `UndoChange`, `Yield`
 - **Skill Management**: `LoadSkill`, `RemoveSkill`
 - **Sub-Agent Tools**: `Delegate` - Delegate sub-tasks to specialized sub-agents
 
@@ -138,7 +138,7 @@ Arguments: {}
 The above continues over and over until:
 
 ```
-Tool Call: Finished
+Tool Call: Yield
 Arguments: {}
 ```
 
diff --git a/cecli/website/docs/config/subagents.md b/cecli/website/docs/config/subagents.md
index 1d20b3ae6a8..ac223f24830 100644
--- a/cecli/website/docs/config/subagents.md
+++ b/cecli/website/docs/config/subagents.md
@@ -147,7 +147,7 @@ When the limit is reached:
 
 ### Cleanup
 
-- **Normal completion**: A sub-agent calls `Finished(summary="...")` which marks it as finished. Its container remains visible but its resources are eligible for lazy cleanup.
+- **Normal completion**: A sub-agent calls `Yield(summary="...")` which marks it as finished. Its container remains visible but its resources are eligible for lazy cleanup.
 - **Session end**: When the parent session ends, all sub-agents are automatically cleaned up.
 - **Force cleanup**: Use `/reap-agent` to immediately destroy a sub-agent and reclaim all resources.
 
diff --git a/tests/subagents/test_commands.py b/tests/subagents/test_commands.py
index c8d55914e57..c5210736a0b 100644
--- a/tests/subagents/test_commands.py
+++ b/tests/subagents/test_commands.py
@@ -7,113 +7,6 @@
 import pytest
 
 
-class TestInvokeAgentCommand:
-    """Tests for InvokeAgentCommand."""
-
-    @pytest.mark.asyncio
-    async def test_no_args_shows_usage(self):
-        """Empty args shows usage error."""
-        from cecli.commands.invoke_agent import InvokeAgentCommand
-
-        io = MagicMock()
-        await InvokeAgentCommand.execute(io, None, "")
-
-        io.tool_error.assert_called_once()
-        assert "Usage" in io.tool_error.call_args[0][0]
-
-    @pytest.mark.asyncio
-    async def test_name_only_no_prompt(self):
-        """Name without prompt passes empty string."""
-        from cecli.commands.invoke_agent import InvokeAgentCommand
-
-        io = MagicMock()
-        coder = MagicMock()
-
-        with patch("cecli.helpers.agents.service.AgentService") as MockSvc:
-            mock_instance = MagicMock()
-            mock_instance.invoke = AsyncMock(return_value="ok")
-            MockSvc.get_instance.return_value = mock_instance
-
-            await InvokeAgentCommand.execute(io, coder, "reviewer")
-
-        mock_instance.invoke.assert_called_once_with("reviewer", "", blocking=True)
-
-    @pytest.mark.asyncio
-    async def test_name_with_prompt(self):
-        """Name with prompt passes prompt correctly."""
-        from cecli.commands.invoke_agent import InvokeAgentCommand
-
-        io = MagicMock()
-        coder = MagicMock()
-
-        with patch("cecli.helpers.agents.service.AgentService") as MockSvc:
-            mock_instance = MagicMock()
-            mock_instance.invoke = AsyncMock(return_value="done")
-            MockSvc.get_instance.return_value = mock_instance
-
-            await InvokeAgentCommand.execute(io, coder, "reviewer review this")
-
-        mock_instance.invoke.assert_called_once_with("reviewer", "review this", blocking=True)
-
-    @pytest.mark.asyncio
-    async def test_value_error_shown_as_error(self):
-        """ValueError from service shown via io.tool_error."""
-        from cecli.commands.invoke_agent import InvokeAgentCommand
-
-        io = MagicMock()
-        coder = MagicMock()
-
-        with patch("cecli.helpers.agents.service.AgentService") as MockSvc:
-            mock_instance = MagicMock()
-            mock_instance.invoke = AsyncMock(side_effect=ValueError("unknown"))
-            MockSvc.get_instance.return_value = mock_instance
-
-            await InvokeAgentCommand.execute(io, coder, "ghost go")
-
-        io.tool_error.assert_called()
-        assert "unknown" in io.tool_error.call_args[0][0]
-
-    @pytest.mark.asyncio
-    async def test_runtime_error_shown_as_error(self):
-        """RuntimeError from service shown via io.tool_error."""
-        from cecli.commands.invoke_agent import InvokeAgentCommand
-
-        io = MagicMock()
-        coder = MagicMock()
-
-        with patch("cecli.helpers.agents.service.AgentService") as MockSvc:
-            mock_instance = MagicMock()
-            mock_instance.invoke = AsyncMock(side_effect=RuntimeError("max reached"))
-            MockSvc.get_instance.return_value = mock_instance
-
-            await InvokeAgentCommand.execute(io, coder, "reviewer go")
-
-        io.tool_error.assert_called()
-        assert "max reached" in io.tool_error.call_args[0][0]
-
-    @pytest.mark.asyncio
-    async def test_summary_output_on_completion(self):
-        """Successful completion shows summary via io.tool_output."""
-        from cecli.commands.invoke_agent import InvokeAgentCommand
-
-        io = MagicMock()
-        coder = MagicMock()
-
-        with patch("cecli.helpers.agents.service.AgentService") as MockSvc:
-            mock_instance = MagicMock()
-            mock_instance.invoke = AsyncMock(return_value="task done")
-            MockSvc.get_instance.return_value = mock_instance
-
-            with patch("cecli.helpers.conversation.service.ConversationService") as MockCS:
-                mock_manager = MagicMock()
-                MockCS.get_manager.return_value = mock_manager
-
-                await InvokeAgentCommand.execute(io, coder, "reviewer do it")
-
-        io.tool_output.assert_called_once()
-        assert "task done" in io.tool_output.call_args[0][0]
-
-
 class TestSpawnAgentCommand:
     """Tests for SpawnAgentCommand."""
 
@@ -143,7 +36,7 @@ async def test_valid_name_calls_spawn(self):
 
             await SpawnAgentCommand.execute(io, coder, "reviewer")
 
-        mock_instance.spawn.assert_called_once_with("reviewer")
+        mock_instance.spawn.assert_called_once_with("reviewer", None, parent=coder)
         io.tool_output.assert_called_once()
         assert "spawned" in io.tool_output.call_args[0][0]
 
diff --git a/tests/subagents/test_delegate.py b/tests/subagents/test_delegate.py
index 2ec5cc23d4c..97cb4ec9cf2 100644
--- a/tests/subagents/test_delegate.py
+++ b/tests/subagents/test_delegate.py
@@ -38,8 +38,8 @@ async def test_both_empty_returns_name_error(self):
         assert "name" in result
 
     @pytest.mark.asyncio
-    async def test_valid_delegate_calls_invoke(self):
-        """Valid params call AgentService.invoke with correct args."""
+    async def test_valid_delegate_calls_spawn(self):
+        """Valid params call AgentService.spawn with correct args."""
         from cecli.tools.delegate import Tool
 
         mock_coder = MagicMock()
@@ -47,7 +47,10 @@ async def test_valid_delegate_calls_invoke(self):
 
         with patch("cecli.helpers.agents.service.AgentService") as MockService:
             mock_instance = MagicMock()
-            mock_instance.invoke = AsyncMock(return_value="review summary")
+            # spawn returns (new_coder, info); info.coder.uuid is used in output
+            mock_info = MagicMock()
+            mock_info.coder.uuid = "child-uuid-123"
+            mock_instance.spawn = AsyncMock(return_value=(MagicMock(), mock_info))
             MockService.get_instance.return_value = mock_instance
 
             result = await Tool.execute(
@@ -55,49 +58,65 @@ async def test_valid_delegate_calls_invoke(self):
             )
 
             MockService.get_instance.assert_called_once_with(mock_coder)
-            mock_instance.invoke.assert_called_once_with("reviewer", "review this", blocking=True)
-            assert "review summary" in result
+            mock_instance.spawn.assert_called_once_with(
+                "reviewer", "review this", parent=mock_coder
+            )
+            assert "agent started with id" in result
+            assert "child-uuid-123" in result
 
-    @pytest.mark.asyncio
-    async def test_delegate_no_summary(self):
-        """When invoke returns None, returns appropriate message."""
+    async def test_delegate_multiple_delegations(self):
+        """Multiple delegations show correct dispatch count."""
         from cecli.tools.delegate import Tool
 
         mock_coder = MagicMock()
+        mock_coder.uuid = "parent-uuid"
+
         with patch("cecli.helpers.agents.service.AgentService") as MockService:
             mock_instance = MagicMock()
-            mock_instance.invoke = AsyncMock(return_value=None)
+
+            async def spawn_side_effect(name, prompt, parent=None):
+                mock_info = MagicMock()
+                mock_info.coder.uuid = f"{name}-uuid"
+                return MagicMock(), mock_info
+
+            mock_instance.spawn = AsyncMock(side_effect=spawn_side_effect)
             MockService.get_instance.return_value = mock_instance
 
             result = await Tool.execute(
-                mock_coder, delegations=[{"name": "tester", "prompt": "test"}]
+                mock_coder,
+                delegations=[
+                    {"name": "agent1", "prompt": "task1"},
+                    {"name": "agent2", "prompt": "task2"},
+                ],
             )
-            assert "completed (no summary)" in result
+
+            assert "2/2 dispatched" in result
+            assert "agent1" in result
+            assert "agent2" in result
 
     @pytest.mark.asyncio
-    async def test_delegate_value_error_returns_error_string(self):
-        """ValueError from service returns error string."""
+    async def test_delegate_spawn_error_returns_error_string(self):
+        """Error from spawn returns error string."""
         from cecli.tools.delegate import Tool
 
         mock_coder = MagicMock()
         with patch("cecli.helpers.agents.service.AgentService") as MockService:
             mock_instance = MagicMock()
-            mock_instance.invoke = AsyncMock(side_effect=ValueError("unknown agent"))
+            mock_instance.spawn = AsyncMock(side_effect=ValueError("unknown agent"))
             MockService.get_instance.return_value = mock_instance
 
             result = await Tool.execute(mock_coder, delegations=[{"name": "ghost", "prompt": "x"}])
             assert "failed" in result
             assert "unknown agent" in result
 
-    @pytest.mark.asyncio
     async def test_delegate_runtime_error_returns_error_string(self):
-        """RuntimeError from service returns error string."""
+        """RuntimeError from spawn returns error string."""
         from cecli.tools.delegate import Tool
 
         mock_coder = MagicMock()
         with patch("cecli.helpers.agents.service.AgentService") as MockService:
             mock_instance = MagicMock()
-            mock_instance.invoke = AsyncMock(side_effect=RuntimeError("max reached"))
+            mock_instance.spawn = AsyncMock(side_effect=RuntimeError("max reached"))
             MockService.get_instance.return_value = mock_instance
 
             result = await Tool.execute(
@@ -106,7 +125,6 @@ async def test_delegate_runtime_error_returns_error_string(self):
             assert "failed" in result
             assert "max reached" in result
 
-    @pytest.mark.asyncio
     async def test_unexpected_exception_caught(self):
         """Any other exception returns error string (doesn't propagate)."""
         from cecli.tools.delegate import Tool
@@ -114,11 +132,11 @@ async def test_unexpected_exception_caught(self):
         mock_coder = MagicMock()
         with patch("cecli.helpers.agents.service.AgentService") as MockService:
             mock_instance = MagicMock()
-            mock_instance.invoke = AsyncMock(side_effect=Exception("unexpected"))
+            mock_instance.spawn = AsyncMock(side_effect=Exception("unexpected"))
             MockService.get_instance.return_value = mock_instance
 
             result = await Tool.execute(
                 mock_coder, delegations=[{"name": "reviewer", "prompt": "x"}]
             )
-            assert "failed with unexpected error" in result
+            assert "failed" in result
             assert "unexpected" in result
diff --git a/tests/subagents/test_finished.py b/tests/subagents/test_finished.py
index ce1137f0a8f..2941a374745 100644
--- a/tests/subagents/test_finished.py
+++ b/tests/subagents/test_finished.py
@@ -8,12 +8,12 @@
 
 
 class TestFinishedTool:
-    """Tests for the Finished tool sub-agent behavior."""
+    """Tests for the Yield tool sub-agent behavior."""
 
     @pytest.mark.asyncio
     async def test_sets_agent_finished_on_coder(self):
         """Sets coder.agent_finished = True."""
-        from cecli.tools.finished import Tool
+        from cecli.tools._yield import Tool
 
         mock_coder = MagicMock()
         mock_coder.parent_uuid = ""
@@ -27,7 +27,7 @@ async def test_sets_agent_finished_on_coder(self):
     async def test_sub_agent_with_summary_updates_info(self):
         """Sub-agent with summary updates SubAgentInfo.summary and status."""
         from cecli.helpers.agents.service import AgentService, SubAgentStatus
-        from cecli.tools.finished import Tool
+        from cecli.tools._yield import Tool
 
         mock_coder = MagicMock()
         mock_coder.uuid = "sub-uuid"
@@ -51,7 +51,7 @@ async def test_sub_agent_with_summary_updates_info(self):
     @pytest.mark.asyncio
     async def test_sub_agent_without_summary(self):
         """Sub-agent without summary kwarg doesn't crash."""
-        from cecli.tools.finished import Tool
+        from cecli.tools._yield import Tool
 
         mock_coder = MagicMock()
         mock_coder.uuid = "sub-uuid"
@@ -59,25 +59,25 @@ async def test_sub_agent_without_summary(self):
         mock_coder.files_edited_by_tools = set()
 
         result = await Tool.execute(mock_coder)
-        assert result == "Task Finished!"
+        assert result == "Yielded."
 
     @pytest.mark.asyncio
     async def test_non_sub_agent_skips_lookup(self):
         """Coder without parent_uuid skips sub-agent lookup."""
-        from cecli.tools.finished import Tool
+        from cecli.tools._yield import Tool
 
         mock_coder = MagicMock()
         mock_coder.parent_uuid = ""
         mock_coder.files_edited_by_tools = set()
 
         result = await Tool.execute(mock_coder)
-        assert result == "Task Finished!"
+        assert result == "Yielded."
 
     @pytest.mark.asyncio
     async def test_unknown_parent_uuid_caught_gracefully(self):
         """Sub-agent with parent not in _instances is caught silently."""
         from cecli.helpers.agents.service import AgentService
-        from cecli.tools.finished import Tool
+        from cecli.tools._yield import Tool
 
         mock_coder = MagicMock()
         mock_coder.uuid = "sub-uuid"
@@ -88,10 +88,9 @@ async def test_unknown_parent_uuid_caught_gracefully(self):
             result = await Tool.execute(mock_coder, summary="done")
             assert "Summary: done" in result
 
-    @pytest.mark.asyncio
     async def test_returns_summary_in_response(self):
         """When summary provided, response includes it."""
-        from cecli.tools.finished import Tool
+        from cecli.tools._yield import Tool
 
         mock_coder = MagicMock()
         mock_coder.parent_uuid = ""
@@ -103,7 +102,7 @@ async def test_returns_summary_in_response(self):
     @pytest.mark.asyncio
     async def test_coder_is_none_returns_error(self):
         """When coder is None, returns error string."""
-        from cecli.tools.finished import Tool
+        from cecli.tools._yield import Tool
 
         result = await Tool.execute(None)
         assert "Error" in result
diff --git a/tests/subagents/test_service.py b/tests/subagents/test_service.py
index 4d3ba4c555c..e866603066d 100644
--- a/tests/subagents/test_service.py
+++ b/tests/subagents/test_service.py
@@ -471,71 +471,82 @@ class TestWait:
     """AgentService.wait() behavior."""
 
     @pytest.mark.asyncio
-    async def test_unknown_name_raises(self, service):
-        """Unknown name raises ValueError."""
-        with pytest.raises(ValueError, match="No sub-agent named"):
-            await service.wait("ghost")
+    async def test_no_children_returns_empty_list(self, service):
+        """Parent with no children returns empty list."""
+        parent_coder = MagicMock()
+        parent_coder.uuid = "parent-uuid"
+        result = await service.wait(parent_coder)
+        assert result == []
 
     @pytest.mark.asyncio
-    async def test_wait_finished_returns_summary(self, service):
-        """Already FINISHED returns summary immediately."""
+    async def test_wait_finished_returns_summary_list(self, service):
+        """Already FINISHED returns summary in a list."""
+        parent_coder = MagicMock()
+        parent_coder.uuid = "parent-uuid"
         info = SubAgentInfo(
             name="agent",
             coder=MagicMock(),
-            parent_uuid="parent",
+            parent_uuid="parent-uuid",
             status=SubAgentStatus.FINISHED,
             summary="done",
         )
+        info.generate_task = None
         service.sub_agents["agent"] = info
         service._sub_agent_order.append("agent")
 
-        result = await service.wait("agent")
-        assert result == "done"
+        result = await service.wait(parent_coder)
+        assert result == ["done"]
 
     @pytest.mark.asyncio
-    async def test_wait_error_raises(self, service):
-        """ERROR status raises RuntimeError."""
+    async def test_wait_error_returns_none_summary(self, service):
+        """ERROR status returns list containing None summary."""
+        parent_coder = MagicMock()
+        parent_coder.uuid = "parent-uuid"
         info = SubAgentInfo(
             name="agent",
             coder=MagicMock(),
-            parent_uuid="parent",
+            parent_uuid="parent-uuid",
             status=SubAgentStatus.ERROR,
             error="something broke",
+            summary=None,
         )
+        info.generate_task = None
         service.sub_agents["agent"] = info
         service._sub_agent_order.append("agent")
 
-        with pytest.raises(RuntimeError, match="something broke"):
-            await service.wait("agent")
+        result = await service.wait(parent_coder)
+        assert result == [None]
 
     @pytest.mark.asyncio
     async def test_wait_polls_until_finished(self, service):
-        """Polls until status is FINISHED then returns summary."""
+        """Polls via generate_task until FINISHED then returns summary."""
+        import asyncio
+
+        parent_coder = MagicMock()
+        parent_coder.uuid = "parent-uuid"
+
         info = SubAgentInfo(
             name="agent",
             coder=MagicMock(),
-            parent_uuid="parent",
+            parent_uuid="parent-uuid",
             status=SubAgentStatus.CREATED,
         )
-        service.sub_agents["agent"] = info
-        service._sub_agent_order.append("agent")
 
-        # Simulate the sub-agent finishing after a brief delay
         async def finish_later():
-            import asyncio
-
-            await asyncio.sleep(0.1)
+            await asyncio.sleep(0.05)
             info.status = SubAgentStatus.FINISHED
             info.summary = "completed"
 
-        import asyncio
+        # Create a generate_task that completes when finish_later runs
+        async def gen_task():
+            await finish_later()
 
-        await asyncio.gather(
-            service.wait("agent"),
-            finish_later(),
-        )
+        info.generate_task = asyncio.create_task(gen_task())
+        service.sub_agents["agent"] = info
+        service._sub_agent_order.append("agent")
 
-        assert info.summary == "completed"
+        result = await service.wait(parent_coder)
+        assert result == ["completed"]
 
 
 # ================================================================== #
diff --git a/tests/tools/test_registry.py b/tests/tools/test_registry.py
index a0a9b34be34..7f540e38bde 100644
--- a/tests/tools/test_registry.py
+++ b/tests/tools/test_registry.py
@@ -28,7 +28,7 @@ def test_registry_initialization(self):
         assert len(tools) > 0, "Registry should have tools after initialization"
 
         # Check that essential tools are registered
-        essential_tools = {"contextmanager", "edittext", "finished"}
+        essential_tools = {"contextmanager", "edittext", "yield"}
         for tool in essential_tools:
             assert tool in tools, f"Essential tool {tool} should be registered"
 
@@ -54,7 +54,7 @@ def test_build_registry_empty_config(self):
         # Essential tools should always be included
         assert "contextmanager" in registry, "Essential tool should be included"
         assert "edittext" in registry, "Essential tool should be included"
-        assert "finished" in registry, "Essential tool should be included"
+        assert "yield" in registry, "Essential tool should be included"
 
     def test_build_registry_with_includelist(self):
         """Test filtering with tools_includelist"""
@@ -65,7 +65,7 @@ def test_build_registry_with_includelist(self):
         assert len(registry) == 3, "Should include 2 from list + 1 essential"
         assert "contextmanager" in registry
         assert "edittext" in registry
-        assert "finished" in registry  # Essential
+        assert "yield" in registry  # Essential
         assert "command" not in registry, "Should not include tools not in includelist"
 
     def test_build_registry_with_excludelist(self):
@@ -86,7 +86,7 @@ def test_build_registry_exclude_essential(self):
         # Essential tools should still be included despite excludelist
         assert "contextmanager" in registry, "Essential tool cannot be excluded"
         assert "edittext" in registry, "Essential tool cannot be excluded"
-        assert "finished" in registry, "Essential tool cannot be excluded"
+        assert "yield" in registry, "Essential tool cannot be excluded"
         assert "command" not in registry, "Non-essential tool should be excluded"
 
     def test_build_registry_combined_filters(self):
@@ -98,10 +98,10 @@ def test_build_registry_combined_filters(self):
         registry = ToolRegistry.build_registry(config)
 
         # Should respect all filters
-        assert len(registry) == 4, "Should include exactly 4 tools (3 from list + finished)"
+        assert len(registry) == 4, "Should include exactly 4 tools (3 from list + yield)"
         assert "contextmanager" in registry
         assert "edittext" in registry
-        assert "finished" in registry
+        assert "yield" in registry
         assert "command" in registry
         assert "commandinteractive" not in registry
 
@@ -117,7 +117,7 @@ def test_get_filtered_tools(self):
         assert len(tool_names) == 3
         assert "contextmanager" in tool_names
         assert "edittext" in tool_names
-        assert "finished" in tool_names  # Essential tool always included
+        assert "yield" in tool_names  # Essential tool always included
 
     def test_legacy_config_names(self):
         """Test backward compatibility with legacy config names (whitelist/blacklist)"""
@@ -148,7 +148,7 @@ def test_config_precedence(self):
             "command" not in registry
         ), "Should not use tools_whitelist when tools_includelist present"
         assert "commandinteractive" not in registry, "Should use tools_excludelist"
-        assert "finished" in registry, "Essential tool cannot be excluded"
+        assert "yield" in registry, "Essential tool cannot be excluded"
 
     def test_registry_consistency(self):
         """Test that registry methods return consistent results"""

From fb996af203263ff5b1b4eefa7214ee5946ecc90c Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Sat, 30 May 2026 13:29:22 -0400
Subject: [PATCH 22/37] Fix yield tool not finishing prematurely

---
 cecli/tools/_yield.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/cecli/tools/_yield.py b/cecli/tools/_yield.py
index c1372c1bdca..1e6bb12b47c 100644
--- a/cecli/tools/_yield.py
+++ b/cecli/tools/_yield.py
@@ -104,7 +104,12 @@ async def execute(cls, coder, **kwargs):
                         except asyncio.CancelledError:
                             pass
 
-                    # Fall through to the normal finishing flow below
+                    # Don't mark as finished — the coder should review sub-agent
+                    # outputs and decide how to proceed
+                    return (
+                        "Sub-agents have finished. Please examine their output above "
+                        "in order to decide how you will proceed."
+                    )
             except Exception as e:
                 logger.warning("Error awaiting child sub-agents before yield: %s", e)
 

From 99c62a08ead1a2a846fa51960801bcb5d4403ca3 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Sat, 30 May 2026 15:06:19 -0400
Subject: [PATCH 23/37] Allow automatic reaping of sub agents

---
 cecli/coders/agent_coder.py            |   8 ++
 cecli/commands/spawn_agent.py          |   2 +-
 cecli/helpers/agents/config.py         |   8 +-
 cecli/helpers/agents/service.py        | 167 ++++++++++++++++++++++---
 cecli/tools/_yield.py                  |  12 +-
 cecli/website/docs/config/subagents.md |  23 +++-
 tests/subagents/test_commands.py       |   2 +-
 7 files changed, 201 insertions(+), 21 deletions(-)

diff --git a/cecli/coders/agent_coder.py b/cecli/coders/agent_coder.py
index 822e640d0b5..ac384c4c39a 100644
--- a/cecli/coders/agent_coder.py
+++ b/cecli/coders/agent_coder.py
@@ -875,6 +875,13 @@ async def reply_completed(self):
         content = self.partial_response_content
         tool_calls_found = bool(self.partial_response_tool_calls)
 
+        # Reap all finished sub-agents with auto_reap enabled
+        try:
+            service = AgentService.get_instance(self)
+            await service.reap_all_finished_agents(parent=service.get_parent(self))
+        except Exception:
+            logger.warning("Failed to reap finished sub-agents", exc_info=True)
+
         # 1. Handle Tool Execution Follow-up (Reflection)
         if self.agent_finished:
             self.tool_usage_history = []
@@ -882,6 +889,7 @@ async def reply_completed(self):
             self.reflected_message = None
             if self.files_edited_by_tools:
                 _ = await self.auto_commit(self.files_edited_by_tools)
+
             return False
 
         # 2. Check for unfinished and recently finished background commands
diff --git a/cecli/commands/spawn_agent.py b/cecli/commands/spawn_agent.py
index 33ffe3c7953..de77578713f 100644
--- a/cecli/commands/spawn_agent.py
+++ b/cecli/commands/spawn_agent.py
@@ -27,7 +27,7 @@ async def execute(cls, io, coder, args, **kwargs):
 
         try:
             agent_service = AgentService.get_instance(coder)
-            await agent_service.spawn(name, prompt, parent=coder)
+            await agent_service.spawn(name, prompt, parent=coder, auto_reap=False)
 
             if coder.tui and coder.tui():
                 switch_key = coder.tui().get_keys_for("next_agent")
diff --git a/cecli/helpers/agents/config.py b/cecli/helpers/agents/config.py
index d054c823773..b99ab76bf15 100644
--- a/cecli/helpers/agents/config.py
+++ b/cecli/helpers/agents/config.py
@@ -19,6 +19,7 @@ class SubAgentConfig:
     prompt: str = ""
     model: Optional[str] = None
     hooks: Dict[str, Any] = field(default_factory=dict)
+    auto_reap: Optional[bool] = None
     metadata: Dict[str, Any] = field(default_factory=dict)
 
 
@@ -71,13 +72,18 @@ def parse_subagent_file(file_path: str) -> Optional[SubAgentConfig]:
     hooks_data = frontmatter_data.get("hooks", {})
     if not isinstance(hooks_data, dict):
         hooks_data = {}
-    metadata = {k: v for k, v in frontmatter_data.items() if k not in ("name", "model", "hooks")}
+    metadata = {
+        k: v
+        for k, v in frontmatter_data.items()
+        if k not in ("name", "model", "hooks", "auto_reap")
+    }
 
     config = SubAgentConfig(
         name=name,
         prompt=prompt,
         model=frontmatter_data.get("model"),
         hooks=hooks_data,
+        auto_reap=frontmatter_data.get("auto_reap"),
         metadata=metadata,
     )
 
diff --git a/cecli/helpers/agents/service.py b/cecli/helpers/agents/service.py
index 34fe637901b..6ec7d6e0dd2 100644
--- a/cecli/helpers/agents/service.py
+++ b/cecli/helpers/agents/service.py
@@ -47,6 +47,7 @@ class SubAgentInfo:
     generate_task: Optional[asyncio.Task] = (
         None  # Track the generate() task for cancellation/monitoring
     )
+    auto_reap: bool = True  # If True, agent may be automatically reaped when FINISHED
 
 
 class AgentService:
@@ -237,13 +238,84 @@ def _get_tui(coder: Any) -> Any:
     # ------------------------------------------------------------------ #
 
     def _reap_finished_agent(self) -> None:
-        """Remove the oldest FINISHED sub-agent (lazy reap)."""
+        """Remove the oldest FINISHED or ERROR sub-agent (lazy reap).
+
+        Only reaps sub-agents whose descendants (children, grandchildren, etc.)
+        have all also finished.  This prevents reaping a sub-agent while it
+        still has running descendant tasks that its ``generate()`` loop may
+        need to process.
+        """
+        # Build parent → children mapping
+        parent_to_children: Dict[str, List[SubAgentInfo]] = {}
+        for info in self.sub_agents.values():
+            parent_to_children.setdefault(info.parent_uuid, []).append(info)
+
+        def _has_unfinished_descendants(agent_uuid: str) -> bool:
+            """Return True if *agent_uuid* has any non-FINISHED/non-ERROR descendant."""
+            for child in parent_to_children.get(agent_uuid, []):
+                if child.status not in (SubAgentStatus.FINISHED, SubAgentStatus.ERROR):
+                    return True
+                if _has_unfinished_descendants(child.coder.uuid):
+                    return True
+            return False
+
         for coder_uuid in list(self._sub_agent_order):
             info = self.sub_agents.get(coder_uuid)
-            if info and info.status == SubAgentStatus.FINISHED:
+            if (
+                info
+                and info.status in (SubAgentStatus.FINISHED, SubAgentStatus.ERROR)
+                and info.auto_reap
+                and (info.generate_task is None or info.generate_task.done())
+                and not _has_unfinished_descendants(coder_uuid)
+            ):
                 self._cleanup_sub_agent(coder_uuid)
                 return
 
+    async def reap_all_finished_agents(self, parent: Any = None) -> None:
+        """Remove all FINISHED or ERROR sub-agents that have ``auto_reap`` enabled.
+
+        Builds a parent→children mapping of all sub-agents and only reaps
+        finished sub-agents whose descendants (children, grandchildren, etc.)
+        have all also finished.  This prevents reaping a sub-agent while it
+        still has running descendant tasks that its ``generate()`` loop may
+        need to process.  Acquires the spawn lock for the given *parent*
+        (or ``self.coder`` if omitted) to serialise with concurrent
+        ``_create_sub_agent_coder()`` operations under the same parent.
+
+        Args:
+            parent: Optional coder instance whose spawn lock will be acquired.
+                    If provided, reaping is serialised against spawns under this
+                    specific parent. Defaults to ``self.coder``.
+        """
+        # Build parent → children mapping
+        parent_to_children: Dict[str, List[SubAgentInfo]] = {}
+        for info in self.sub_agents.values():
+            parent_to_children.setdefault(info.parent_uuid, []).append(info)
+
+        def _has_unfinished_descendants(agent_uuid: str) -> bool:
+            """Return True if *agent_uuid* has any non-FINISHED/non-ERROR descendant."""
+            for child in parent_to_children.get(agent_uuid, []):
+                if child.status not in (SubAgentStatus.FINISHED, SubAgentStatus.ERROR):
+                    return True
+                if _has_unfinished_descendants(child.coder.uuid):
+                    return True
+            return False
+
+        # Acquire the spawn lock for the primary coder to serialise with
+        # concurrent spawn operations that also hold this lock.
+        parent_coder = parent if parent is not None else self.coder
+        async with self._get_lock(self._spawn_locks, parent_coder.uuid):
+            for coder_uuid in list(self._sub_agent_order):
+                info = self.sub_agents.get(coder_uuid)
+                if (
+                    info
+                    and info.status in (SubAgentStatus.FINISHED, SubAgentStatus.ERROR)
+                    and info.auto_reap
+                    and (info.generate_task is None or info.generate_task.done())
+                    and not _has_unfinished_descendants(coder_uuid)
+                ):
+                    self._cleanup_sub_agent(coder_uuid)
+
     def _cleanup_sub_agent(self, agent_uuid: str) -> None:
         """Remove agent instance from tracking and notify TUI if possible."""
         info = self.sub_agents.pop(agent_uuid, None)
@@ -301,7 +373,9 @@ def _check_max_sub_agents(self) -> None:
         Raises RuntimeError if no finished agents can be reaped.
         """
         active_count = sum(
-            1 for info in self.sub_agents.values() if info.status != SubAgentStatus.FINISHED
+            1
+            for info in self.sub_agents.values()
+            if info.status not in (SubAgentStatus.FINISHED, SubAgentStatus.ERROR)
         )
         if active_count < self.max_sub_agents:
             return
@@ -311,7 +385,9 @@ def _check_max_sub_agents(self) -> None:
 
         # Recalculate active count after reaping
         active_count = sum(
-            1 for info in self.sub_agents.values() if info.status != SubAgentStatus.FINISHED
+            1
+            for info in self.sub_agents.values()
+            if info.status not in (SubAgentStatus.FINISHED, SubAgentStatus.ERROR)
         )
         if active_count >= self.max_sub_agents:
             raise RuntimeError(
@@ -320,7 +396,7 @@ def _check_max_sub_agents(self) -> None:
             )
 
     async def _create_sub_agent_coder(
-        self, name: str, parent: Any = None
+        self, name: str, parent: Any = None, auto_reap: Optional[bool] = None
     ) -> Tuple[Any, SubAgentInfo]:
         """Create a sub-agent coder, register it, and set up its container and prompt.
 
@@ -329,10 +405,13 @@ async def _create_sub_agent_coder(
 
         Args:
             name: Name of the sub-agent to create.
-            parent: Optional coder instance to use as the parent.
-                    If provided, the new sub-agent's ``parent_uuid`` will be
-                    ``parent.uuid`` instead of ``self.coder.uuid``, enabling
-                    nested sub-agent hierarchies. Defaults to ``self.coder``.
+            parent: Optional coder instance to use as the parent for nested
+                    sub-agent hierarchies. If provided, the new sub-agent's
+                    ``parent_uuid`` will be ``parent.uuid`` instead of
+                    ``self.coder.uuid``. Defaults to ``self.coder``.
+            auto_reap: If True, agent may be automatically reaped when FINISHED.
+                    If not set, defers to the sub-agent config's ``auto_reap``
+                    value, then defaults to ``True``.
 
         Returns:
             Tuple of ``(new_coder, info)``.
@@ -347,6 +426,12 @@ async def _create_sub_agent_coder(
                 f"Unknown sub-agent '{name}'. " f"Available: {list(self._global_registry.keys())}"
             )
 
+        # Resolve auto_reap: None means defer to sub-agent config, then default to True
+        if auto_reap is None:
+            auto_reap = getattr(config, "auto_reap", None)
+            if auto_reap is None:
+                auto_reap = True
+
         # Critical section: max-sub-agent check and registration must be atomic
         # to prevent TOCTOU race when multiple spawns fire concurrently.
         # Coder.create() is called *outside* the lock to avoid holding an
@@ -393,8 +478,8 @@ async def _create_sub_agent_coder(
                 coder=new_coder,
                 parent_uuid=parent_coder.uuid,
                 status=SubAgentStatus.CREATED,
+                auto_reap=auto_reap,
             )
-
             self.sub_agents[new_coder.uuid] = info
             self._sub_agent_order.append(new_coder.uuid)
 
@@ -583,7 +668,12 @@ async def _inject_sub_agent_result(self, info: SubAgentInfo) -> None:
             )
 
     async def invoke(
-        self, name: str, prompt: str, blocking: bool = True, parent: Any = None
+        self,
+        name: str,
+        prompt: str,
+        blocking: bool = True,
+        parent: Any = None,
+        auto_reap: Optional[bool] = None,
     ) -> Optional[str]:
         """Invoke a sub-agent by name with the given prompt (blocking by default).
 
@@ -594,7 +684,9 @@ async def invoke(
             parent: Optional coder instance to use as the parent for nested
                    sub-agent hierarchies. Defaults to ``self.coder``.
         """
-        new_coder, info = await self._create_sub_agent_coder(name, parent)
+        new_coder, info = await self._create_sub_agent_coder(
+            name, auto_reap=auto_reap, parent=parent
+        )
 
         if not blocking:
             return None
@@ -605,7 +697,11 @@ async def invoke(
         return info.summary
 
     async def spawn(
-        self, name: str, prompt: Optional[str] = None, parent: Any = None
+        self,
+        name: str,
+        prompt: Optional[str] = None,
+        parent: Any = None,
+        auto_reap: Optional[bool] = None,
     ) -> Tuple[Any, SubAgentInfo]:
         """Spawn a sub-agent (non-blocking) that waits for user input.
 
@@ -620,7 +716,9 @@ async def spawn(
             Tuple of ``(new_coder, info)`` so callers can further interact
             with the sub-agent (e.g. call ``start_generate_task`` later).
         """
-        new_coder, info = await self._create_sub_agent_coder(name, parent)
+        new_coder, info = await self._create_sub_agent_coder(
+            name, auto_reap=auto_reap, parent=parent
+        )
         if prompt:
             self.start_generate_task(info, prompt)
         return new_coder, info
@@ -689,6 +787,47 @@ def get_children(self, coder_or_uuid: Any) -> List[SubAgentInfo]:
 
         return [info for info in self.sub_agents.values() if info.parent_uuid == uid]
 
+    def get_parent(self, coder_or_uuid: Any) -> Any:
+        """Return the parent coder for the given coder or UUID.
+
+        If the given coder is the primary coder (``self.coder``), returns itself.
+        Otherwise, looks up the sub-agent's parent in the tracking data and
+        returns that parent's coder instance.
+
+        This is used for lock key resolution when reaping from a sub-agent
+        context — the spawn lock should be acquired with the parent's UUID
+        to properly serialise with concurrent spawn operations under that
+        same parent.
+
+        Args:
+            coder_or_uuid: A coder instance (with ``.uuid``) or a UUID string.
+
+        Returns:
+            The parent coder instance, or ``self.coder`` if the given coder is
+            the primary coder or has no known parent.
+        """
+        if hasattr(coder_or_uuid, "uuid"):
+            uid = str(coder_or_uuid.uuid)
+        else:
+            uid = str(coder_or_uuid)
+
+        # Primary coder returns itself
+        if uid == self.coder.uuid:
+            return self.coder
+
+        # Look up the sub-agent to find its parent_uuid
+        info = self.sub_agents.get(uid)
+        if info and info.parent_uuid:
+            # Parent is the primary coder
+            if info.parent_uuid == self.coder.uuid:
+                return self.coder
+            # Parent is another sub-agent — look up its coder
+            parent_info = self.sub_agents.get(info.parent_uuid)
+            if parent_info:
+                return parent_info.coder
+
+        return self.coder
+
     # ------------------------------------------------------------------ #
     # Foreground agent tracking
     # ------------------------------------------------------------------ #
diff --git a/cecli/tools/_yield.py b/cecli/tools/_yield.py
index 1e6bb12b47c..a95b7343118 100644
--- a/cecli/tools/_yield.py
+++ b/cecli/tools/_yield.py
@@ -40,13 +40,13 @@ async def execute(cls, coder, **kwargs):
 
         This gives the LLM explicit control over when it can stop looping
         """
+        from cecli.helpers.agents.service import AgentService
+
         cls.clear_invocation_cache()
 
         if coder:
             # Check for active child sub-agents and await their tasks before finishing
             try:
-                from cecli.helpers.agents.service import AgentService
-
                 agent_service = AgentService.get_instance(coder)
                 children = agent_service.get_children(coder)
                 active_tasks = [
@@ -113,6 +113,13 @@ async def execute(cls, coder, **kwargs):
             except Exception as e:
                 logger.warning("Error awaiting child sub-agents before yield: %s", e)
 
+            # Reap all finished sub-agents with auto_reap enabled
+            try:
+                service = AgentService.get_instance(coder)
+                await service.reap_all_finished_agents(parent=service.get_parent(coder))
+            except Exception:
+                logger.warning("Failed to reap finished sub-agents", exc_info=True)
+
             coder.agent_finished = True
 
             # If this is a sub-agent, capture the summary for the parent
@@ -120,7 +127,6 @@ async def execute(cls, coder, **kwargs):
             parent_uuid = coder.parent_uuid
             if parent_uuid:
                 try:
-                    from cecli.helpers.agents.service import AgentService
 
                     AgentService.mark_sub_agent_finished(
                         sub_coder_uuid=coder.uuid,
diff --git a/cecli/website/docs/config/subagents.md b/cecli/website/docs/config/subagents.md
index ac223f24830..da0585a527b 100644
--- a/cecli/website/docs/config/subagents.md
+++ b/cecli/website/docs/config/subagents.md
@@ -43,6 +43,7 @@ and suggestions for improvement.
 | `name` | Yes | Unique name used to reference the sub-agent in commands and the Delegate tool |
 | `model` | No | Model override for this sub-agent. If omitted, inherits the parent agent's model |
 | `hooks` | No | Per-agent hooks configuration (see [Hooks](/config/hooks) for syntax) |
+| `auto_reap` | No | Controls whether this sub-agent is automatically reaped when the limit is reached. Defaults to `true` if omitted |
 
 #### System Prompt
 
@@ -61,6 +62,8 @@ agent-config:
         - "~/team-agents"     # Custom path for shared agent definitions
 ```
 
+
+
 ## Usage
 
 ### Available Commands
@@ -142,9 +145,27 @@ The `max_sub_agents` setting (default: 3) limits how many concurrent sub-agents
 
 When the limit is reached:
 
-- If any sub-agents have **finished**, the oldest finished one is automatically reaped to make room
+- If any sub-agents have **finished** and have `auto_reap: true` (the default), the oldest finished one is automatically reaped to make room
 - If all sub-agents are still **running**, a `RuntimeError` is raised. You must wait for one to finish or use `/reap-agent` to free resources.
 
+#### Auto-Reap
+
+The `auto_reap` field in the sub-agent definition's YAML front matter controls whether a finished sub-agent is automatically reaped when the maximum sub-agent limit is reached. When `true` (the default), the oldest finished sub-agent will be removed to make room for new ones.
+
+```markdown
+---
+name: reviewer
+model: deepseek/deepseek-v4-pro
+auto_reap: false  # Prevent automatic reaping of this agent
+---
+You are a code review specialist.
+```
+
+- **`/spawn-agent`** always spawns sub-agents with `auto_reap=false` — since these agents are created manually by the user, they should persist until explicitly reaped with `/reap-agent`.
+- **`Delegate` tool** uses the sub-agent's configured `auto_reap` value from its definition. If not set in the `.md` front matter, it defaults to `true`.
+
+Sub-agents with `auto_reap: true` that finish their work are candidates for automatic cleanup when the agent limit is reached. Sub-agents with `auto_reap: false` are never automatically reaped and must be cleaned up manually.
+
 ### Cleanup
 
 - **Normal completion**: A sub-agent calls `Yield(summary="...")` which marks it as finished. Its container remains visible but its resources are eligible for lazy cleanup.
diff --git a/tests/subagents/test_commands.py b/tests/subagents/test_commands.py
index c5210736a0b..4cb340c1c84 100644
--- a/tests/subagents/test_commands.py
+++ b/tests/subagents/test_commands.py
@@ -36,7 +36,7 @@ async def test_valid_name_calls_spawn(self):
 
             await SpawnAgentCommand.execute(io, coder, "reviewer")
 
-        mock_instance.spawn.assert_called_once_with("reviewer", None, parent=coder)
+        mock_instance.spawn.assert_called_once_with("reviewer", None, parent=coder, auto_reap=False)
         io.tool_output.assert_called_once()
         assert "spawned" in io.tool_output.call_args[0][0]
 

From 7df28767a0a122d29ccb840be4232b7cec759f21 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Sun, 31 May 2026 15:44:59 -0400
Subject: [PATCH 24/37] Update tool parsing referencing PR #536 but a bit more
 idiomatically laid out

---
 cecli/coders/agent_coder.py        |  41 ++-
 cecli/coders/base_coder.py         |  14 +
 cecli/helpers/responses.py         | 367 +++++++++++++++++++++++++-
 cecli/tools/delegate.py            |   1 +
 cecli/tools/edit_text.py           |   1 +
 cecli/tools/explore_code.py        |   1 +
 cecli/tools/grep.py                |   1 +
 cecli/tools/read_range.py          |   1 +
 cecli/tools/update_todo_list.py    |   1 +
 cecli/tools/utils/base_tool.py     |   9 +-
 cecli/tools/utils/helpers.py       |  45 ++++
 tests/tools/test_extractions.py    | 406 +++++++++++++++++++++++++++++
 tests/tools/test_tool_arguments.py | 298 +++++++++++++++++++++
 13 files changed, 1156 insertions(+), 30 deletions(-)
 create mode 100644 tests/tools/test_extractions.py
 create mode 100644 tests/tools/test_tool_arguments.py

diff --git a/cecli/coders/agent_coder.py b/cecli/coders/agent_coder.py
index ba5b88b515b..8d77628ce25 100644
--- a/cecli/coders/agent_coder.py
+++ b/cecli/coders/agent_coder.py
@@ -12,7 +12,6 @@
 from datetime import datetime
 from pathlib import Path
 
-from cecli import utils
 from cecli.change_tracker import ChangeTracker
 from cecli.helpers import nested, responses
 from cecli.helpers.agents.service import AgentService
@@ -732,25 +731,23 @@ async def _execute_local_tools(self, tool_calls_list):
                     continue
 
                 if args_string:
-                    json_chunks = utils.split_concatenated_json(args_string)
-                    for chunk in json_chunks:
-                        try:
-                            parsed_args_list.append(json.loads(chunk))
-                        except json.JSONDecodeError as e:
-                            self.model_kwargs = {}
-                            self.io.tool_warning(
-                                f"Malformed JSON arguments in tool {tool_name}: {chunk}"
-                            )
-                            tool_responses.append(
-                                {
-                                    "role": "tool",
-                                    "tool_call_id": tool_call.id,
-                                    "content": (
-                                        f"Malformed JSON arguments in tool {tool_name}: {str(e)}"
-                                    ),
-                                }
-                            )
-                            continue
+                    parsed = responses.parse_tool_arguments(args_string)
+                    if isinstance(parsed, dict) and "@error" in parsed:
+                        self.io.tool_warning(
+                            f"Malformed JSON arguments in tool {tool_name}: {parsed['@error']}"
+                        )
+                        tool_responses.append(
+                            {
+                                "role": "tool",
+                                "tool_call_id": tool_call.id,
+                                "content": (
+                                    f"Malformed JSON arguments in tool {tool_name}: {parsed['@error']}"
+                                ),
+                            }
+                        )
+                        continue
+                    parsed_args_list = [parsed]
+
                 if not parsed_args_list and not args_string:
                     parsed_args_list.append({})
                 all_results_content = []
@@ -846,7 +843,9 @@ async def _execute_mcp_tools(self, server, tool_calls):
         for tool_call in tool_calls:
             # Use existing _execute_mcp_tool logic
             result = await self._execute_mcp_tool(
-                server, tool_call.function.name, json.loads(tool_call.function.arguments)
+                server,
+                tool_call.function.name,
+                responses.parse_tool_arguments(tool_call.function.arguments),
             )
             responses.append(
                 {
diff --git a/cecli/coders/base_coder.py b/cecli/coders/base_coder.py
index ff7a92829b6..b6dc6d6f69f 100755
--- a/cecli/coders/base_coder.py
+++ b/cecli/coders/base_coder.py
@@ -2655,6 +2655,13 @@ def _expand_concatenated_json(self, tool_calls):
                 expanded_tool_calls.append(tool_call)
                 continue
 
+            merged = responses.merge_glued_json_objects(json_chunks)
+            if merged is not None:
+                new_tool_call = copy_tool_call(tool_call)
+                new_tool_call.function.arguments = json.dumps(merged)
+                expanded_tool_calls.append(new_tool_call)
+                continue
+
             # We have concatenated JSON, so expand it into multiple tool calls.
             for i, chunk in enumerate(json_chunks):
                 if not chunk.strip():
@@ -3638,12 +3645,19 @@ def consolidate_chunks(self):
             extracted_calls = responses.extract_tools_from_content_json(
                 self.partial_response_content
             )
+
             if not extracted_calls:
                 extracted_calls = responses.extract_tools_from_content_xml(
                     self.partial_response_content
                 )
 
+            if not extracted_calls:
+                extracted_calls = responses.extract_tools_from_pseudo_json(
+                    self.partial_response_content
+                )
+
             if extracted_calls:
+                self.tool_reflection = True
                 self.partial_response_tool_calls = extracted_calls
 
         self.partial_response_consolidated = (response, func_err, content_err)
diff --git a/cecli/helpers/responses.py b/cecli/helpers/responses.py
index 6f4762e2a88..87d10a2daff 100644
--- a/cecli/helpers/responses.py
+++ b/cecli/helpers/responses.py
@@ -6,6 +6,7 @@
 from litellm.types.utils import ChatCompletionMessageToolCall, Function
 
 from cecli import utils
+from cecli.helpers import nested
 
 
 def preprocess_json(response: str) -> str:
@@ -45,14 +46,21 @@ def extract_tools_from_content_json(content: str) -> Optional[List[ChatCompletio
             chunk_index += 1
             try:
                 json_obj = json.loads(chunk)
-                if isinstance(json_obj, dict) and "name" in json_obj and "arguments" in json_obj:
+                arg_keys = ["arguments", "parameters", "params"]
+
+                if (
+                    isinstance(json_obj, dict)
+                    and "name" in json_obj
+                    and any(key in json_obj for key in arg_keys)
+                ):
                     # Create a Pydantic model for the tool call
+                    json_args = nested.getter(json_obj, arg_keys)
                     function_obj = Function(
                         name=json_obj["name"],
                         arguments=(
-                            json.dumps(json_obj["arguments"])
-                            if isinstance(json_obj["arguments"], (dict, list))
-                            else str(json_obj["arguments"])
+                            json.dumps(json_args)
+                            if isinstance(json_args, (dict, list))
+                            else str(json_args)
                         ),
                     )
                     tool_call_obj = ChatCompletionMessageToolCall(
@@ -63,13 +71,18 @@ def extract_tools_from_content_json(content: str) -> Optional[List[ChatCompletio
                     extracted_calls.append(tool_call_obj)
                 elif isinstance(json_obj, list):
                     for item in json_obj:
-                        if isinstance(item, dict) and "name" in item and "arguments" in item:
+                        if (
+                            isinstance(item, dict)
+                            and "name" in item
+                            and any(key in item for key in arg_keys)
+                        ):
+                            item_args = nested.getter(item, arg_keys)
                             function_obj = Function(
                                 name=item["name"],
                                 arguments=(
-                                    json.dumps(item["arguments"])
-                                    if isinstance(item["arguments"], (dict, list))
-                                    else str(item["arguments"])
+                                    json.dumps(item_args)
+                                    if isinstance(item_args, (dict, list))
+                                    else str(item_args)
                                 ),
                             )
                             tool_call_obj = ChatCompletionMessageToolCall(
@@ -132,6 +145,91 @@ def extract_tools_from_content_xml(content: str) -> Optional[List[ChatCompletion
         return None
 
 
+def extract_tools_from_pseudo_json(content: str) -> Optional[List[ChatCompletionMessageToolCall]]:
+    """
+    Extraction of tool calls from bracket format.
+
+    Handles blocks shaped like:
+    [ToolName(arg1=value1, arg2=value2, ...)]
+
+    Where values can be JSON arrays, objects, booleans, strings, or numbers.
+    The parser handles nested parentheses and commas inside JSON values.
+
+    Example:
+    [Local--ReadRange(show=[{"file_path": "agent.py", "start_text": "class A"}], verbose=true, mode="strict")]
+    """
+    if not content or "[" not in content:
+        return None
+
+    try:
+        extracted_calls = []
+
+        # Scan through content to find all [ToolName(...)] blocks
+        i = 0
+        while i < len(content):
+            bracket_start = content.find("[", i)
+            if bracket_start == -1:
+                break
+
+            # Find the opening paren after the bracket
+            paren_start = content.find("(", bracket_start)
+            if paren_start == -1:
+                i = bracket_start + 1
+                continue
+
+            tool_name = content[bracket_start + 1 : paren_start].strip()
+            if not tool_name or not re.match(r"^[a-zA-Z0-9_\\-]+$", tool_name):
+                i = paren_start + 1
+                continue
+
+            # Find matching closing paren tracking nesting depth
+            depth = 1
+            paren_end = -1
+            pos = paren_start + 1
+            while pos < len(content) and depth > 0:
+                if content[pos] == "(":
+                    depth += 1
+                elif content[pos] == ")":
+                    depth -= 1
+                    if depth == 0:
+                        paren_end = pos
+                        break
+                pos += 1
+
+            if paren_end == -1:
+                i = paren_start + 1
+                continue
+
+            # Expect "]" after ")"
+            if paren_end + 1 >= len(content) or content[paren_end + 1] != "]":
+                i = paren_end + 1
+                continue
+
+            # Extract the payload between the parentheses
+            payload = content[paren_start + 1 : paren_end]
+
+            # Parse the arguments from the payload
+            args = _parse_bracket_arguments(payload)
+
+            # Create a tool call object
+            function_obj = Function(
+                name=tool_name,
+                arguments=json.dumps(args),
+            )
+            tool_call_obj = ChatCompletionMessageToolCall(
+                type="function",
+                function=function_obj,
+                id=f"bracket_call_{len(extracted_calls)}_{int(time.time())}",
+            )
+            extracted_calls.append(tool_call_obj)
+
+            i = paren_end + 2  # Skip past ")]"
+
+        return extracted_calls if extracted_calls else None
+    except Exception:
+        return None
+
+
 def prefix_tool_name(server_name: str, tool_name: str) -> str:
     """
     Prefix a tool name with the server name.
@@ -243,3 +341,256 @@ def unprefix_tool_call(tool_call):
             result["function"]["name"] = unprefixed_name
 
     return server_name, result
+
+
+def parse_tool_arguments(args_string: str) -> dict:
+    """Parse tool-call arguments, merging glued ``{…}{} {…}`` object fragments."""
+    text = (args_string or "").strip()
+    if not text:
+        return {}
+    try:
+        parsed = json.loads(text)
+        if isinstance(parsed, dict):
+            return parsed
+    except json.JSONDecodeError:
+        pass
+
+    parsed = try_parse_json_value(text)
+    if isinstance(parsed, dict):
+        return parsed
+
+    chunks = utils.split_concatenated_json(text)
+    if len(chunks) <= 1:
+        if not chunks:
+            return {}
+        lone = try_parse_json_value(chunks[0])
+        if isinstance(lone, dict):
+            return lone
+        try:
+            single = json.loads(chunks[0])
+        except json.JSONDecodeError as err:
+            return {"@error": f"Malformed JSON arguments: {err}"}
+        return single if isinstance(single, dict) else {}
+
+    merged = merge_glued_json_objects(chunks)
+
+    if merged is not None:
+        return merged
+
+    return {
+        "@error": "Could not merge glued JSON objects: argument fragments are not all JSON objects"
+    }
+
+
+def merge_glued_json_objects(chunks: list[str]) -> dict | None:
+    """
+    Merge consecutive JSON object strings from glued local-model tool args.
+
+    Example: ``{"limit": 15}{}{"path": "."}`` → ``{"limit": 15, "path": "."}``.
+    Returns ``None`` when chunks are not all mergeable objects (caller may split).
+    """
+    merged: dict = {}
+    saw_non_empty = False
+    for chunk in chunks:
+        text = chunk.strip()
+        if not text:
+            continue
+        obj = try_parse_json_value(text)
+        if obj is None:
+            try:
+                obj = json.loads(text)
+            except json.JSONDecodeError:
+                return None
+        if isinstance(obj, list):
+            return None
+        if not isinstance(obj, dict):
+            return None
+        if obj:
+            merged.update(obj)
+            saw_non_empty = True
+    if saw_non_empty or merged == {}:
+        return merged
+    return None
+
+
+def try_parse_json_value(text: str):
+    """Parse JSON text, including repairs for common local-model tool-arg quirks."""
+    text = text.strip()
+    if not text:
+        return None
+    for candidate in (text, _repair_local_model_json_text(text)):
+        try:
+            return json.loads(candidate)
+        except json.JSONDecodeError:
+            continue
+    if "}{" in text:
+        chunks = utils.split_concatenated_json(text)
+        if len(chunks) == 1:
+            try:
+                return json.loads(chunks[0])
+            except json.JSONDecodeError:
+                pass
+        elif len(chunks) > 1:
+            parsed = []
+            for chunk in chunks:
+                try:
+                    parsed.append(json.loads(chunk))
+                except json.JSONDecodeError:
+                    parsed = None
+                    break
+            if parsed is not None:
+                return parsed
+    if len(text) >= 8:
+        coerced = try_join_char_split_json_array(list(text))
+        if coerced is not None:
+            return coerced
+    return None
+
+
+def try_join_char_split_json_array(items: list) -> list | None:
+    """
+    Some local models emit a JSON array as one string per character in tool args.
+
+    Example: tasks=["[", "{", "\\"", "t", "a", "s", "k", "\\"", ...] instead of
+    tasks='[{"task": "...", "done": false}]'.
+    """
+    if len(items) < 8:
+        return None
+    # Quick check: the first item must be the opening bracket of a JSON construct.
+    # This avoids O(n) string-joining for legitimate string lists like
+    # ["file_a.py", "file_b.py", ...] where the first item isn't JSON-like.
+    first = items[0]
+    if first not in ("[", "{"):
+        return None
+
+    if not all(isinstance(x, str) for x in items):
+        return None
+
+    joined = "".join(items).strip()
+    if not joined.startswith(("[", "{")):
+        return None
+    try:
+        parsed = json.loads(joined)
+    except json.JSONDecodeError:
+        return None
+    if isinstance(parsed, dict):
+        return [parsed]
+    if isinstance(parsed, list):
+        return parsed
+    return None
+
+
+def _repair_local_model_json_text(text: str) -> str:
+    """
+    Repair common local-model breakage in double-encoded tool JSON.
+
+    Models sometimes emit a literal newline between ``:`` and the opening quote
+    of a string value (e.g. ``"end_text":\\n",`` instead of ``"end_text": "",``).
+    """
+    repaired = re.sub(r':\s*\n\s*",', ': "",', text)
+    repaired = re.sub(r':\s*\n\s*"}', ': ""}', repaired)
+    return repaired
+
+
+def _parse_bracket_arguments(payload_str: str) -> dict:
+    """Parse multiple arguments from a bracket-style payload.
+
+    Uses depth-aware scanning instead of regex-based boundary detection,
+    so that ``=`` signs nested inside JSON strings, arrays, or objects
+    are not mistaken for argument separators.
+
+    Example: ``show=[...], verbose=true, mode="strict"``
+    """
+    arguments: dict = {}
+    i = 0
+    n = len(payload_str)
+
+    while i < n:
+        # Skip whitespace and inter-argument commas
+        while i < n and payload_str[i] in " ,\t":
+            i += 1
+        if i >= n:
+            break
+
+        # Extract key name (alphanumeric, underscore, hyphen)
+        key_start = i
+        while i < n and (payload_str[i].isalnum() or payload_str[i] in "_-"):
+            i += 1
+        key = payload_str[key_start:i]
+
+        if not key:
+            i += 1
+            continue
+
+        # Skip whitespace before '='
+        while i < n and payload_str[i] in " \t":
+            i += 1
+        if i >= n or payload_str[i] != "=":
+            i += 1
+            continue
+        i += 1  # skip '='
+
+        # Skip whitespace before value
+        while i < n and payload_str[i] in " \t":
+            i += 1
+
+        # Extract value with depth tracking
+        value_start = i
+        depth_paren = 0  # ()
+        depth_brace = 0  # {}
+        depth_bracket = 0  # []
+        in_dquote = False
+        in_squote = False
+
+        while i < n:
+            ch = payload_str[i]
+
+            if in_dquote:
+                if ch == "\\":
+                    i += 2  # skip escaped character
+                    continue
+                if ch == '"':
+                    in_dquote = False
+            elif in_squote:
+                if ch == "\\":
+                    i += 2
+                    continue
+                if ch == "'":
+                    in_squote = False
+            else:
+                if ch == '"':
+                    in_dquote = True
+                elif ch == "'":
+                    in_squote = True
+                elif ch == "(":
+                    depth_paren += 1
+                elif ch == ")":
+                    depth_paren -= 1
+                elif ch == "{":
+                    depth_brace += 1
+                elif ch == "}":
+                    depth_brace -= 1
+                elif ch == "[":
+                    depth_bracket += 1
+                elif ch == "]":
+                    depth_bracket -= 1
+                elif ch == ",":
+                    # Comma at depth 0 = next argument separator
+                    if depth_paren == 0 and depth_brace == 0 and depth_bracket == 0:
+                        break
+
+            i += 1
+
+        val_str = payload_str[value_start:i].strip()
+        # Drop trailing comma left by the break
+        if val_str.endswith(","):
+            val_str = val_str[:-1].strip()
+
+        # Try to parse the value as native JSON
+        try:
+            arguments[key] = json.loads(val_str)
+        except (json.JSONDecodeError, ValueError):
+            # Fallback if it is unquoted plain text
+            arguments[key] = val_str
+
+    return arguments
diff --git a/cecli/tools/delegate.py b/cecli/tools/delegate.py
index 660db6c276d..1fa6a5313ff 100644
--- a/cecli/tools/delegate.py
+++ b/cecli/tools/delegate.py
@@ -10,6 +10,7 @@
 class Tool(BaseTool):
     NORM_NAME = "delegate"
     TRACK_INVOCATIONS = True
+    LIST_PARAMS = ["delegations"]
     SCHEMA = {
         "type": "function",
         "function": {
diff --git a/cecli/tools/edit_text.py b/cecli/tools/edit_text.py
index df6c8fc8c56..8f5ed549322 100644
--- a/cecli/tools/edit_text.py
+++ b/cecli/tools/edit_text.py
@@ -27,6 +27,7 @@
 class Tool(BaseTool):
     NORM_NAME = "edittext"
     TRACK_INVOCATIONS = False
+    LIST_PARAMS = ["edits"]
     SCHEMA = {
         "type": "function",
         "function": {
diff --git a/cecli/tools/explore_code.py b/cecli/tools/explore_code.py
index dbe12c60377..c95f8df8acd 100644
--- a/cecli/tools/explore_code.py
+++ b/cecli/tools/explore_code.py
@@ -19,6 +19,7 @@
 
 class Tool(BaseTool):
     NORM_NAME = "explorecode"
+    LIST_PARAMS = ["queries"]
     SCHEMA = {
         "type": "function",
         "function": {
diff --git a/cecli/tools/grep.py b/cecli/tools/grep.py
index 03f51d57275..deb9db27d60 100644
--- a/cecli/tools/grep.py
+++ b/cecli/tools/grep.py
@@ -12,6 +12,7 @@
 
 class Tool(BaseTool):
     NORM_NAME = "grep"
+    LIST_PARAMS = ["searches"]
     SCHEMA = {
         "type": "function",
         "function": {
diff --git a/cecli/tools/read_range.py b/cecli/tools/read_range.py
index a9eaab3abfc..fe286b69645 100644
--- a/cecli/tools/read_range.py
+++ b/cecli/tools/read_range.py
@@ -16,6 +16,7 @@
 class Tool(BaseTool):
     NORM_NAME = "readrange"
     TRACK_INVOCATIONS = False
+    LIST_PARAMS = ["show"]
     SCHEMA = {
         "type": "function",
         "function": {
diff --git a/cecli/tools/update_todo_list.py b/cecli/tools/update_todo_list.py
index 223c85256a1..8d9395b22e7 100644
--- a/cecli/tools/update_todo_list.py
+++ b/cecli/tools/update_todo_list.py
@@ -5,6 +5,7 @@
 
 class Tool(BaseTool):
     NORM_NAME = "updatetodolist"
+    LIST_PARAMS = ["tasks"]
     SCHEMA = {
         "type": "function",
         "function": {
diff --git a/cecli/tools/utils/base_tool.py b/cecli/tools/utils/base_tool.py
index fa7e33c5758..2ed174594d2 100644
--- a/cecli/tools/utils/base_tool.py
+++ b/cecli/tools/utils/base_tool.py
@@ -1,6 +1,6 @@
 from abc import ABC, abstractmethod
 
-from cecli.tools.utils.helpers import handle_tool_error
+from cecli.tools.utils.helpers import handle_tool_error, normalize_json_array
 from cecli.tools.utils.output import print_tool_response
 
 
@@ -12,6 +12,9 @@ class BaseTool(ABC):
     NORM_NAME = None
     SCHEMA = None
 
+    # Parameters to run normalization checks on
+    LIST_PARAMS = []
+
     # Invocation tracking for detecting repeated tool calls
     _invocations = {}  # Dict to store last 3 invocations per tool
     _invocation_summary = set()  # Set to track distinct tool names
@@ -119,6 +122,10 @@ def process_response(cls, coder, params):
                         coder, tool_name, ValueError(error_msg), add_traceback=False
                     )
 
+            for param in cls.LIST_PARAMS:
+                if param in params:
+                    params[param] = normalize_json_array(params[param], param_name=param)
+
             # Add current invocation to history (keeping only last 3)
             if params:
                 cls._invocations[tool_name].append((current_params_tuple, params))
diff --git a/cecli/tools/utils/helpers.py b/cecli/tools/utils/helpers.py
index d3c219383bb..f05e2eda8f9 100644
--- a/cecli/tools/utils/helpers.py
+++ b/cecli/tools/utils/helpers.py
@@ -1,8 +1,11 @@
 import difflib
+import json
 import os
 import re
 import traceback
 
+from cecli.helpers import responses
+
 
 class ToolError(Exception):
     """Custom exception for tool-specific errors that should be reported to the LLM."""
@@ -338,6 +341,48 @@ def format_tool_result(
         return result_for_llm
 
 
+def normalize_json_array(value, *, param_name: str = "items", allow_empty: bool = False) -> list:
+    """
+    Coerce tool args that should be arrays but sometimes arrive as JSON strings.
+
+    Local models occasionally double-encode array parameters as JSON text, or emit
+    arrays as per-character string lists (see ``try_join_char_split_json_array``).
+    """
+    if isinstance(value, list):
+        coerced = responses.try_join_char_split_json_array(value)
+        if coerced is not None:
+            value = coerced
+        elif len(value) == 1 and isinstance(value[0], str):
+            # Single element wrapping the whole JSON array/object as a string.
+            if value[0].strip().startswith(("[", "{", '"')):
+                value = value[0]
+
+    if isinstance(value, str):
+        text = value.strip()
+        if not text:
+            if allow_empty:
+                return []
+            raise ToolError(f"{param_name} array cannot be empty")
+        parsed = responses.try_parse_json_value(text)
+        if parsed is None:
+            try:
+                parsed = json.loads(text)
+            except json.JSONDecodeError as err:
+                raise ToolError(f"Invalid {param_name} parameter JSON: {err}") from err
+        value = parsed
+
+    if isinstance(value, dict):
+        value = [value]
+
+    if not isinstance(value, list):
+        raise ToolError(f"{param_name} must be an array, got {type(value).__name__}")
+
+    if len(value) == 0 and not allow_empty:
+        raise ToolError(f"{param_name} array cannot be empty")
+
+    return value
+
+
 # Example usage within a hypothetical tool:
 # try:
 #     abs_path, rel_path, original_content = validate_file_for_edit(coder, file_path)
diff --git a/tests/tools/test_extractions.py b/tests/tools/test_extractions.py
new file mode 100644
index 00000000000..a3b51439202
--- /dev/null
+++ b/tests/tools/test_extractions.py
@@ -0,0 +1,406 @@
+"""Tests for all 3 extract_tools_from_content_* methods in cecli.helpers.responses."""
+
+import json
+
+from cecli.helpers.responses import (
+    _parse_bracket_arguments,
+    extract_tools_from_content_json,
+    extract_tools_from_content_xml,
+    extract_tools_from_pseudo_json,
+)
+
+# =============================================================================
+# extract_tools_from_content_json
+# =============================================================================
+
+
+def test_json_single_tool_call_with_arguments_key():
+    """Standard tool call with 'arguments' key should be extracted."""
+    content = '{"name": "ls", "arguments": {"path": "."}}'
+    result = extract_tools_from_content_json(content)
+    assert result is not None
+    assert len(result) == 1
+    assert result[0].function.name == "ls"
+    assert json.loads(result[0].function.arguments) == {"path": "."}
+
+
+def test_json_single_tool_call_with_parameters_key():
+    """Tool call with 'parameters' key should be extracted."""
+    content = '{"name": "read_file", "parameters": {"file_path": "/tmp/test.txt"}}'
+    result = extract_tools_from_content_json(content)
+    assert result is not None
+    assert len(result) == 1
+    assert result[0].function.name == "read_file"
+    assert json.loads(result[0].function.arguments) == {"file_path": "/tmp/test.txt"}
+
+
+def test_json_single_tool_call_with_params_key():
+    """Tool call with 'params' key should be extracted."""
+    content = '{"name": "search", "params": {"query": "hello"}}'
+    result = extract_tools_from_content_json(content)
+    assert result is not None
+    assert len(result) == 1
+    assert result[0].function.name == "search"
+    assert json.loads(result[0].function.arguments) == {"query": "hello"}
+
+
+def test_json_array_of_tool_calls():
+    """A JSON array of tool call objects should all be extracted."""
+    content = (
+        "["
+        '{"name": "ls", "arguments": {"path": "."}},'
+        '{"name": "grep", "arguments": {"pattern": "test"}}'
+        "]"
+    )
+    result = extract_tools_from_content_json(content)
+    assert result is not None
+    assert len(result) == 2
+    assert result[0].function.name == "ls"
+    assert result[1].function.name == "grep"
+
+
+def test_json_empty_content_returns_none():
+    """Empty content should return None."""
+    assert extract_tools_from_content_json("") is None
+
+
+def test_json_no_braces_returns_none():
+    """Content without braces or brackets should return None."""
+    assert extract_tools_from_content_json("plain text") is None
+
+
+def test_json_missing_required_keys_returns_none():
+    """JSON without 'name' and arg keys should not be extracted."""
+    content = '{"foo": "bar", "baz": 42}'
+    assert extract_tools_from_content_json(content) is None
+
+
+def test_json_malformed_json_returns_none():
+    """Malformed JSON should return None."""
+    content = '{"name": "ls", "arguments": }'
+    assert extract_tools_from_content_json(content) is None
+
+
+def test_json_with_string_arguments():
+    """Tool call where arguments is a string (not dict/list) should work."""
+    content = '{"name": "echo", "arguments": "hello world"}'
+    result = extract_tools_from_content_json(content)
+    assert result is not None
+    assert len(result) == 1
+    assert result[0].function.name == "echo"
+    # String arguments get serialised as a JSON string
+    assert result[0].function.arguments == "hello world"
+
+
+def test_json_tool_with_nested_arguments():
+    """Tool call with deeply nested arguments should work."""
+    content = (
+        '{"name": "ReadRange", "arguments": {'
+        '"show": [{"file_path": "test.py", "start_text": "hello"}]'
+        "}}"
+    )
+    result = extract_tools_from_content_json(content)
+    assert result is not None
+    assert len(result) == 1
+    assert result[0].function.name == "ReadRange"
+    args = json.loads(result[0].function.arguments)
+    assert args["show"][0]["file_path"] == "test.py"
+
+
+def test_json_multiple_tool_calls_in_list_first_not_valid():
+    """Array where first item is not a valid tool call should skip it."""
+    content = (
+        "[" '{"irrelevant": true},' '{"name": "actual_tool", "arguments": {"key": "value"}}' "]"
+    )
+    result = extract_tools_from_content_json(content)
+    assert result is not None
+    assert len(result) == 1
+    assert result[0].function.name == "actual_tool"
+
+
+def test_json_all_invalid_array_items_returns_none():
+    """Array where ALL items are invalid tool calls should return None."""
+    content = "[" '{"irrelevant": true},' '{"also_invalid": [1, 2, 3]}' "]"
+    result = extract_tools_from_content_json(content)
+    assert result is None
+
+
+# =============================================================================
+# extract_tools_from_content_xml
+# =============================================================================
+
+
+def test_xml_single_tool_call():
+    """Basic XML-style tool call should be extracted."""
+    content = (
+        "<function=UpdateTodoList>"
+        "<parameter=tasks>"
+        '[{"task": "Update task list", "done": false}]'
+        "</parameter>"
+        "</function>"
+    )
+    result = extract_tools_from_content_xml(content)
+    assert result is not None
+    assert len(result) == 1
+    assert result[0].function.name == "UpdateTodoList"
+    args = json.loads(result[0].function.arguments)
+    assert "tasks" in args
+    assert args["tasks"][0]["task"] == "Update task list"
+
+
+def test_xml_multiple_parameters():
+    """Tool call with multiple parameters should work."""
+    content = (
+        "<function=ReadRange>"
+        "<parameter=file_path>"
+        '"test.py"'
+        "</parameter>"
+        "<parameter=start_text>"
+        '"hello"'
+        "</parameter>"
+        "</function>"
+    )
+    result = extract_tools_from_content_xml(content)
+    assert result is not None
+    assert len(result) == 1
+    assert result[0].function.name == "ReadRange"
+    args = json.loads(result[0].function.arguments)
+    assert args["file_path"] == "test.py"
+    assert args["start_text"] == "hello"
+
+
+def test_xml_multiple_tool_calls():
+    """Multiple XML tool calls in content should all be extracted."""
+    content = (
+        "Some text "
+        "<function=ToolA>"
+        "<parameter=arg1>"
+        '"val1"'
+        "</parameter>"
+        "</function>"
+        " more text "
+        "<function=ToolB>"
+        "<parameter=count>42</parameter>"
+        "</function>"
+    )
+    result = extract_tools_from_content_xml(content)
+    assert result is not None
+    assert len(result) == 2
+    assert result[0].function.name == "ToolA"
+    assert result[1].function.name == "ToolB"
+    args_b = json.loads(result[1].function.arguments)
+    assert args_b["count"] == 42
+
+
+def test_xml_empty_content_returns_none():
+    """Empty content should return None."""
+    assert extract_tools_from_content_xml("") is None
+
+
+def test_xml_no_function_tags_returns_none():
+    """Content without <function=...> tags should return None."""
+    assert extract_tools_from_content_xml("plain text") is None
+
+
+def test_xml_parameter_with_array_value():
+    """Parameter with a JSON array value should parse correctly."""
+    content = "<function=BulkTool>" "<parameter=items>" "[1, 2, 3, 4]" "</parameter>" "</function>"
+    result = extract_tools_from_content_xml(content)
+    assert result is not None
+    assert len(result) == 1
+    args = json.loads(result[0].function.arguments)
+    assert args["items"] == [1, 2, 3, 4]
+
+
+def test_xml_parameter_with_string_fallback():
+    """Non-JSON parameter value should fall back to raw string."""
+    content = (
+        "<function=SimpleTool>"
+        "<parameter=note>"
+        "just some plain text"
+        "</parameter>"
+        "</function>"
+    )
+    result = extract_tools_from_content_xml(content)
+    assert result is not None
+    assert len(result) == 1
+    args = json.loads(result[0].function.arguments)
+    assert args["note"] == "just some plain text"
+
+
+def test_xml_nested_in_text():
+    """XML tool call embedded in surrounding text should be extracted."""
+    content = (
+        "I will use the UpdateTodoList tool.\n"
+        "<function=UpdateTodoList>"
+        "<parameter=tasks>"
+        '[{"task": "test", "done": false}]'
+        "</parameter>"
+        "</function>"
+        "\nThat should update the list."
+    )
+    result = extract_tools_from_content_xml(content)
+    assert result is not None
+    assert len(result) == 1
+    assert result[0].function.name == "UpdateTodoList"
+
+
+# =============================================================================
+# extract_tools_from_pseudo_json
+# =============================================================================
+
+
+def test_pseudo_single_tool_with_array_arg():
+    """Bracket format with a JSON array argument should be extracted."""
+    content = '[Local--ReadRange(show=[{"file_path": "test.py", ' '"start_text": "def foo"}])]'
+    result = extract_tools_from_pseudo_json(content)
+    assert result is not None
+    assert len(result) == 1
+    assert result[0].function.name == "Local--ReadRange"
+    args = json.loads(result[0].function.arguments)
+    assert args["show"][0]["file_path"] == "test.py"
+
+
+def test_pseudo_multiple_args_with_different_types():
+    """Multiple args with boolean, string, and array values."""
+    content = (
+        '[Local--ReadRange(show=[{"file_path": "test.py", '
+        '"start_text": "class A"}], verbose=true, mode="strict")]'
+    )
+    result = extract_tools_from_pseudo_json(content)
+    assert result is not None
+    assert len(result) == 1
+    assert result[0].function.name == "Local--ReadRange"
+    args = json.loads(result[0].function.arguments)
+    assert args["verbose"] is True
+    assert args["mode"] == "strict"
+    assert len(args["show"]) == 1
+
+
+def test_pseudo_multiple_tool_blocks():
+    """Multiple bracket tool blocks in content should all be extracted."""
+    content = 'First [ToolA(arg1="val1")] and ' "then [ToolB(count=42, flag=true)]"
+    result = extract_tools_from_pseudo_json(content)
+    assert result is not None
+    assert len(result) == 2
+    assert result[0].function.name == "ToolA"
+    assert result[1].function.name == "ToolB"
+    args_b = json.loads(result[1].function.arguments)
+    assert args_b["count"] == 42
+    assert args_b["flag"] is True
+
+
+def test_pseudo_empty_content_returns_none():
+    """Empty content should return None."""
+    assert extract_tools_from_pseudo_json("") is None
+
+
+def test_pseudo_no_brackets_returns_none():
+    """Content without brackets should return None."""
+    assert extract_tools_from_pseudo_json("plain text") is None
+
+
+def test_pseudo_nested_parentheses():
+    """Values with nested parentheses should be handled correctly."""
+    content = '[DeepNest(calc="((1+2)*3)", name="test")]'
+    result = extract_tools_from_pseudo_json(content)
+    assert result is not None
+    assert len(result) == 1
+    assert result[0].function.name == "DeepNest"
+    args = json.loads(result[0].function.arguments)
+    assert args["calc"] == "((1+2)*3)"
+    assert args["name"] == "test"
+
+
+def test_pseudo_incomplete_bracket_no_match():
+    """Missing closing bracket should not be extracted."""
+    content = '[ToolA(arg1="val1"'
+    assert extract_tools_from_pseudo_json(content) is None
+
+
+def test_pseudo_missing_closing_paren():
+    """Missing closing parenthesis should skip the block."""
+    content = '[ToolA(arg1="val1") more text'
+    assert extract_tools_from_pseudo_json(content) is None
+
+
+def test_pseudo_tool_in_surrounding_text():
+    """Bracket tool call embedded in text should be extracted."""
+    content = (
+        "I will use the Local--ReadRange tool:\n"
+        '[Local--ReadRange(show=[{"file_path": "test.py"}])]'
+        "\nThat should read the file."
+    )
+    result = extract_tools_from_pseudo_json(content)
+    assert result is not None
+    assert len(result) == 1
+    assert result[0].function.name == "Local--ReadRange"
+
+
+def test_pseudo_numeric_and_null_values():
+    """Numeric and null values should parse correctly."""
+    content = "[MathTool(x=42, y=3.14, optional=null)]"
+    result = extract_tools_from_pseudo_json(content)
+    assert result is not None
+    assert len(result) == 1
+    args = json.loads(result[0].function.arguments)
+    assert args["x"] == 42
+    assert args["y"] == 3.14
+    assert args["optional"] is None
+
+
+# =============================================================================
+# _parse_bracket_arguments (helper)
+# =============================================================================
+
+
+def test_parse_bracket_arguments_single():
+    """Single argument should parse correctly."""
+    payload = 'show=[{"file_path": "test.py"}]'
+    result = _parse_bracket_arguments(payload)
+    assert result["show"] == [{"file_path": "test.py"}]
+
+
+def test_parse_bracket_arguments_multiple():
+    """Multiple arguments of mixed types should parse correctly."""
+    payload = 'show=[{"file_path": "test.py"}], verbose=true, mode="strict"'
+    result = _parse_bracket_arguments(payload)
+    assert len(result) == 3
+    assert result["verbose"] is True
+    assert result["mode"] == "strict"
+    assert result["show"] == [{"file_path": "test.py"}]
+
+
+def test_parse_bracket_arguments_empty():
+    """Empty payload should return empty dict."""
+    assert _parse_bracket_arguments("") == {}
+
+
+def test_parse_bracket_arguments_numbers_and_null():
+    """Numbers and null values should be parsed correctly."""
+    payload = "x=42, y=3.14, z=null"
+    result = _parse_bracket_arguments(payload)
+    assert result["x"] == 42
+    assert result["y"] == 3.14
+    assert result["z"] is None
+
+
+def test_parse_bracket_arguments_escaped_quotes():
+    """Values with escaped double quotes inside strings should parse correctly."""
+    payload = 'arg="value with \\"escaped quote\\" inside"'
+    result = _parse_bracket_arguments(payload)
+    assert result["arg"] == 'value with "escaped quote" inside'
+
+
+def test_parse_bracket_arguments_escaped_backslash():
+    """Values with escaped backslashes inside strings should parse correctly."""
+    payload = 'path="C:\\\\Users\\\\test"'
+    result = _parse_bracket_arguments(payload)
+    assert result["path"] == "C:\\Users\\test"
+
+
+def test_parse_bracket_arguments_single_quoted_string():
+    """Single-quoted string values retain their quotes (JSON doesn't parse single quotes)."""
+    payload = "name='hello world'"
+    result = _parse_bracket_arguments(payload)
+    assert result["name"] == "'hello world'"
diff --git a/tests/tools/test_tool_arguments.py b/tests/tools/test_tool_arguments.py
new file mode 100644
index 00000000000..c06ff1d5d83
--- /dev/null
+++ b/tests/tools/test_tool_arguments.py
@@ -0,0 +1,298 @@
+"""Glued local-model tool JSON argument parsing."""
+
+import json
+from types import SimpleNamespace
+from unittest.mock import Mock
+
+import pytest
+
+from cecli.coders.base_coder import Coder
+from cecli.helpers.responses import (
+    _repair_local_model_json_text,
+    extract_tools_from_content_json,
+    merge_glued_json_objects,
+    parse_tool_arguments,
+    try_join_char_split_json_array,
+    try_parse_json_value,
+)
+from cecli.tools.grep import Tool as GrepTool
+from cecli.tools.utils.helpers import ToolError, normalize_json_array
+
+
+def test_parse_tool_arguments_merges_glued_objects_with_empty_fragments():
+    raw = '{"limit": 15}{}{"path": "."}'
+    assert parse_tool_arguments(raw) == {"limit": 15, "path": "."}
+
+
+def test_parse_tool_arguments_merges_grep_style_glued_args():
+    raw = (
+        '{"limit": 15}{}{"searches": [{"file_pattern": "*.md", '
+        '"pattern": "TODO|FIXME", "use_regex": true}]}'
+    )
+    out = parse_tool_arguments(raw)
+    assert out["limit"] == 15
+    assert out["searches"][0]["pattern"] == "TODO|FIXME"
+
+
+def test_merge_glued_returns_none_for_non_object_chunks():
+    assert merge_glued_json_objects(['["a"]', '{"b": 1}']) is None
+
+
+def test_merge_glued_all_empty_chunks_returns_dict():
+    """All-empty chunks should return an empty dict (no non-empty content to merge)."""
+    result = merge_glued_json_objects(["{}", "{}"])
+    assert result is not None
+    assert result == {}
+
+
+def test_merge_glued_single_empty_chunk_returns_dict():
+    """A single empty object chunk should return an empty dict."""
+    result = merge_glued_json_objects(["{}"])
+    assert result is not None
+    assert result == {}
+
+
+def test_merge_glued_empty_string_chunks_returns_empty_dict():
+    """Chunks that are empty strings are skipped, returning an empty merged dict."""
+    result = merge_glued_json_objects(["", "", ""])
+    # Empty strings are stripped to empty and skipped, leaving merged == {} -> returns {}
+    assert result == {}
+
+
+def test_expand_concatenated_json_merges_instead_of_splitting(monkeypatch):
+    """Dogfood: DeepSeek ``{…}{}{…}`` must not become three tool calls."""
+
+    class MiniCoder(Coder):
+        def __init__(self):
+            pass
+
+    coder = MiniCoder.__new__(MiniCoder)
+    tool_call = SimpleNamespace(
+        id="call-1",
+        function=SimpleNamespace(
+            name="ls",
+            arguments='{"limit": 15}{}{"path": "."}',
+        ),
+    )
+    expanded = coder._expand_concatenated_json([tool_call])
+    assert len(expanded) == 1
+    assert json.loads(expanded[0].function.arguments) == {"limit": 15, "path": "."}
+    assert expanded[0].id == "call-1"
+
+
+def test_grep_format_output_empty_searches_does_not_crash_tool_footer():
+    coder = SimpleNamespace(
+        io=SimpleNamespace(tool_error=Mock(), tool_output=Mock(), tool_warning=Mock()),
+        verbose=False,
+        pretty=False,
+        tui=lambda: None,
+    )
+    tool_response = SimpleNamespace(
+        function=SimpleNamespace(
+            name="Grep",
+            arguments='{"limit": 15}{}{"searches": []}',
+        ),
+    )
+    GrepTool.format_output(
+        coder,
+        mcp_server=SimpleNamespace(name="Local"),
+        tool_response=tool_response,
+    )
+    assert coder.io.tool_error.called
+
+
+def test_try_join_char_split_json_array_reconstructs_array():
+    """Char-split JSON array should be joined back into a proper list."""
+    items = ["[", "{", '"', "t", "a", "s", "k", '"', ":", " ", '"', "x", '"', "}", "]"]
+    result = try_join_char_split_json_array(items)
+    assert result == [{"task": "x"}]
+
+
+def test_try_join_char_split_json_array_reconstructs_dict():
+    """Char-split JSON object should be joined and wrapped in a list."""
+    items = ["{", '"', "a", '"', ":", " ", "1", "}"]
+    result = try_join_char_split_json_array(items)
+    assert result == [{"a": 1}]
+
+
+def test_try_join_char_split_json_array_too_few_items():
+    """Less than 8 items should return None."""
+    assert try_join_char_split_json_array(["{", "}"]) is None
+
+
+def test_try_join_char_split_json_array_non_string_items():
+    """Non-string items should return None."""
+    assert try_join_char_split_json_array(["[", 1, "{", "}"]) is None
+
+
+def test_try_join_char_split_json_array_does_not_start_with_brace():
+    """Joined string not starting with [ or { should return None."""
+    items = list('"hello"')  # A JSON string, not object/array
+    assert try_join_char_split_json_array(items) is None
+
+
+def test_try_join_char_split_json_array_invalid_json():
+    """Joined string that is invalid JSON should return None."""
+    items = ["{", '"', "a", '"', ":", " ", "b", "r", "o", "k", "e", "n", "}"]
+    assert try_join_char_split_json_array(items) is None
+
+
+def test_repair_local_model_json_text_newline_before_quote():
+    """Repair colon-newline-quote breakage inside JSON."""
+    broken = '"end_text":\n",'
+    fixed = _repair_local_model_json_text(broken)
+    assert '"end_text": "",' in fixed
+
+
+def test_repair_local_model_json_text_newline_before_closing_brace():
+    """Repair colon-newline-quote before closing brace."""
+    broken = '"end_text":\n"}'
+    fixed = _repair_local_model_json_text(broken)
+    assert '"end_text": ""}' in fixed
+
+
+def test_repair_local_model_json_text_valid_json_passes_through():
+    """Valid JSON text should remain unchanged."""
+    valid = '{"limit": 15, "path": "."}'
+    assert _repair_local_model_json_text(valid) == valid
+
+
+def test_normalize_json_array_actual_list_passthrough():
+    """A proper list should pass through unchanged."""
+    data = [{"pattern": "foo"}, {"pattern": "bar"}]
+    result = normalize_json_array(data, param_name="searches")
+    assert result == data
+
+
+def test_normalize_json_array_char_split_input():
+    """Char-split list should be joined back into a proper array."""
+    items = ["[", "{", '"', "t", "a", "s", "k", '"', ":", " ", '"', "x", '"', "}", "]"]
+    result = normalize_json_array(items, param_name="delegations")
+    assert result == [{"task": "x"}]
+
+
+def test_normalize_json_array_json_string_wrapping_array():
+    """A JSON string containing an array should be parsed."""
+    result = normalize_json_array('[{"a": 1}, {"b": 2}]', param_name="edits")
+    assert result == [{"a": 1}, {"b": 2}]
+
+
+def test_normalize_json_array_json_string_wrapping_dict():
+    """A JSON string containing a dict should be wrapped in a list."""
+    result = normalize_json_array('{"task": "hello"}', param_name="tasks")
+    assert result == [{"task": "hello"}]
+
+
+def test_normalize_json_array_empty_string_raises_tool_error():
+    """An empty string should raise ToolError."""
+    with pytest.raises(ToolError, match="array cannot be empty"):
+        normalize_json_array("", param_name="items")
+
+
+def test_normalize_json_array_empty_string_with_allow_empty():
+    """An empty string with allow_empty=True should return empty list."""
+    assert normalize_json_array("", param_name="items", allow_empty=True) == []
+
+
+def test_normalize_json_array_invalid_json_string_raises_tool_error():
+    """An invalid JSON string should raise ToolError."""
+    with pytest.raises(ToolError, match="Invalid.*parameter JSON"):
+        normalize_json_array("{broken", param_name="items")
+
+
+def test_normalize_json_array_dict_input_wraps_in_list():
+    """A bare dict should be wrapped in a list."""
+    result = normalize_json_array({"task": "hello"}, param_name="tasks")
+    assert result == [{"task": "hello"}]
+
+
+def test_normalize_json_array_non_list_non_dict_raises():
+    """A non-list, non-dict, non-string value should raise ToolError."""
+    with pytest.raises(ToolError, match="must be an array"):
+        normalize_json_array(42, param_name="items")
+
+
+def test_normalize_json_array_empty_list_without_allow_empty_raises():
+    """An empty list without allow_empty should raise ToolError."""
+    with pytest.raises(ToolError, match="array cannot be empty"):
+        normalize_json_array([], param_name="items")
+
+
+def test_normalize_json_array_empty_list_with_allow_empty():
+    """An empty list with allow_empty=True should pass through."""
+    assert normalize_json_array([], param_name="items", allow_empty=True) == []
+
+
+def test_extract_tools_from_content_json_with_arguments_key():
+    """Standard tool calls with 'arguments' key should be extracted."""
+    content = '{"name": "ls", "arguments": {"path": "."}}'
+    result = extract_tools_from_content_json(content)
+    assert result is not None
+    assert len(result) == 1
+    assert result[0].function.name == "ls"
+    assert json.loads(result[0].function.arguments) == {"path": "."}
+
+
+def test_extract_tools_from_content_json_with_parameters_key():
+    """Tool calls with 'parameters' key should be extracted."""
+    content = '{"name": "ls", "parameters": {"path": "."}}'
+    result = extract_tools_from_content_json(content)
+    assert result is not None
+    assert len(result) == 1
+    assert result[0].function.name == "ls"
+    assert json.loads(result[0].function.arguments) == {"path": "."}
+
+
+def test_extract_tools_from_content_json_with_params_key():
+    """Tool calls with 'params' key should be extracted."""
+    content = '{"name": "ls", "params": {"path": "."}}'
+    result = extract_tools_from_content_json(content)
+    assert result is not None
+    assert len(result) == 1
+    assert result[0].function.name == "ls"
+    assert json.loads(result[0].function.arguments) == {"path": "."}
+
+
+def test_extract_tools_from_content_json_array_with_parameters():
+    """Array of tool calls with 'parameters' key should be extracted."""
+    content = (
+        '[{"name": "ls", "parameters": {"path": "."}},'
+        ' {"name": "grep", "parameters": {"pattern": "foo"}}]'
+    )
+    result = extract_tools_from_content_json(content)
+    assert result is not None
+    assert len(result) == 2
+    assert result[0].function.name == "ls"
+    assert result[1].function.name == "grep"
+    assert json.loads(result[1].function.arguments) == {"pattern": "foo"}
+
+
+def test_parse_tool_arguments_truly_unparseable():
+    """Truly unparsable JSON should return an empty dict."""
+    inp = "this is not json at all!!!"
+    result = parse_tool_arguments(inp)
+    assert "@error" in result
+    assert "Malformed JSON arguments" in result["@error"]
+
+
+def test_parse_tool_arguments_empty_string():
+    """Empty string should return an empty dict."""
+    assert parse_tool_arguments("") == {}
+    assert parse_tool_arguments("   ") == {}
+    assert parse_tool_arguments(None) == {}
+
+
+def test_try_parse_json_value_empty_text():
+    """Empty text should return None."""
+    assert try_parse_json_value("") is None
+    assert try_parse_json_value("   ") is None
+
+
+def test_parse_tool_arguments_uneven_glued_objects_with_list():
+    """Glued objects where one chunk is a list should not merge (fallback)."""
+    # This is a case that currently returns {} because the merge fails
+    inp = '{"a": 1}{"b": 2}["c"]'
+    result = parse_tool_arguments(inp)
+    # The function tries to parse, failing on the mixed glued content
+    assert "@error" in result
+    assert "Could not merge glued JSON objects" in result["@error"]

From cb8b714d52537759ed7391d6b61447cbb4aeecad Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Sun, 31 May 2026 15:54:35 -0400
Subject: [PATCH 25/37] Skills should be loadable by subagents

---
 cecli/commands/exclude_skill.py |  2 +-
 cecli/commands/include_skill.py |  2 +-
 cecli/commands/load_skill.py    |  3 ++-
 cecli/commands/remove_skill.py  |  2 +-
 cecli/helpers/skills.py         | 20 ++++++++++++++++----
 cecli/tools/load_skill.py       |  2 +-
 cecli/tools/remove_skill.py     |  2 +-
 7 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/cecli/commands/exclude_skill.py b/cecli/commands/exclude_skill.py
index 086e967678a..b50ceb00044 100644
--- a/cecli/commands/exclude_skill.py
+++ b/cecli/commands/exclude_skill.py
@@ -19,7 +19,7 @@ async def execute(cls, io, coder, args, **kwargs):
         skill_names = args.strip().split()
 
         # Check if we're in agent mode
-        if not hasattr(coder, "edit_format") or coder.edit_format != "agent":
+        if not hasattr(coder, "edit_format") or coder.edit_format not in ("agent", "subagent"):
             io.tool_output("Skill exclusion is only available in agent mode.")
             return format_command_result(
                 io, "exclude-skill", "Skill exclusion is only available in agent mode"
diff --git a/cecli/commands/include_skill.py b/cecli/commands/include_skill.py
index 754ccdf2dd2..6fa39ac3732 100644
--- a/cecli/commands/include_skill.py
+++ b/cecli/commands/include_skill.py
@@ -19,7 +19,7 @@ async def execute(cls, io, coder, args, **kwargs):
         skill_names = args.strip().split()
 
         # Check if we're in agent mode
-        if not hasattr(coder, "edit_format") or coder.edit_format != "agent":
+        if not hasattr(coder, "edit_format") or coder.edit_format not in ("agent", "subagent"):
             io.tool_output("Skill inclusion is only available in agent mode.")
             return format_command_result(
                 io, "include-skill", "Skill inclusion is only available in agent mode"
diff --git a/cecli/commands/load_skill.py b/cecli/commands/load_skill.py
index 33056214f4a..328f56653f9 100644
--- a/cecli/commands/load_skill.py
+++ b/cecli/commands/load_skill.py
@@ -19,7 +19,8 @@ async def execute(cls, io, coder, args, **kwargs):
         skill_names = args.strip().split()
 
         # Check if we're in agent mode
-        if not hasattr(coder, "edit_format") or coder.edit_format != "agent":
+        io.tool_output(coder.edit_format)
+        if not hasattr(coder, "edit_format") or coder.edit_format not in ("agent", "subagent"):
             io.tool_output("Skill loading is only available in agent mode.")
             return format_command_result(
                 io, "load-skill", "Skill loading is only available in agent mode"
diff --git a/cecli/commands/remove_skill.py b/cecli/commands/remove_skill.py
index 35afe8f5e42..81c684f7dd3 100644
--- a/cecli/commands/remove_skill.py
+++ b/cecli/commands/remove_skill.py
@@ -19,7 +19,7 @@ async def execute(cls, io, coder, args, **kwargs):
         skill_names = args.strip().split()
 
         # Check if we're in agent mode
-        if not hasattr(coder, "edit_format") or coder.edit_format != "agent":
+        if not hasattr(coder, "edit_format") or coder.edit_format not in ("agent", "subagent"):
             io.tool_output("Skill removal is only available in agent mode.")
             return format_command_result(
                 io, "remove-skill", "Skill removal is only available in agent mode"
diff --git a/cecli/helpers/skills.py b/cecli/helpers/skills.py
index bfb0f469a0c..40bf2a3bd45 100644
--- a/cecli/helpers/skills.py
+++ b/cecli/helpers/skills.py
@@ -449,7 +449,10 @@ def load_skill(self, skill_name: str) -> str:
             return "Error: Skills manager not connected to a coder instance."
 
         # Check if we're in agent mode
-        if not hasattr(self.coder, "edit_format") or self.coder.edit_format != "agent":
+        if not hasattr(self.coder, "edit_format") or self.coder.edit_format not in (
+            "agent",
+            "subagent",
+        ):
             return "Error: Skill loading is only available in agent mode."
 
         # Check if skill is already loaded
@@ -498,7 +501,10 @@ def remove_skill(self, skill_name: str) -> str:
             return "Error: Skills manager not connected to a coder instance."
 
         # Check if we're in agent mode
-        if not hasattr(self.coder, "edit_format") or self.coder.edit_format != "agent":
+        if not hasattr(self.coder, "edit_format") or self.coder.edit_format not in (
+            "agent",
+            "subagent",
+        ):
             return "Error: Skill removal is only available in agent mode."
 
         # Check if skill is already removed
@@ -532,7 +538,10 @@ def include_skill(self, skill_name: str) -> str:
             return "Error: Skills manager not connected to a coder instance."
 
         # Check if we're in agent mode
-        if not hasattr(self.coder, "edit_format") or self.coder.edit_format != "agent":
+        if not hasattr(self.coder, "edit_format") or self.coder.edit_format not in (
+            "agent",
+            "subagent",
+        ):
             return "Error: Skill inclusion is only available in agent mode."
 
         # Find the skill to verify it exists
@@ -590,7 +599,10 @@ def exclude_skill(self, skill_name: str) -> str:
             return "Error: Skills manager not connected to a coder instance."
 
         # Check if we're in agent mode
-        if not hasattr(self.coder, "edit_format") or self.coder.edit_format != "agent":
+        if not hasattr(self.coder, "edit_format") or self.coder.edit_format not in (
+            "agent",
+            "subagent",
+        ):
             return "Error: Skill exclusion is only available in agent mode."
 
         # Find the skill to verify it exists
diff --git a/cecli/tools/load_skill.py b/cecli/tools/load_skill.py
index 15f620579fb..f59beea940f 100644
--- a/cecli/tools/load_skill.py
+++ b/cecli/tools/load_skill.py
@@ -30,7 +30,7 @@ def execute(cls, coder, skill_name, **kwargs):
             return "Error: Skill name is required."
 
         # Check if we're in agent mode
-        if not hasattr(coder, "edit_format") or coder.edit_format != "agent":
+        if not hasattr(coder, "edit_format") or coder.edit_format not in ("agent", "subagent"):
             return "Error: Skill loading is only available in agent mode."
 
         # Check if skills_manager is available
diff --git a/cecli/tools/remove_skill.py b/cecli/tools/remove_skill.py
index 70afb02ebfd..a4e0e72eed9 100644
--- a/cecli/tools/remove_skill.py
+++ b/cecli/tools/remove_skill.py
@@ -30,7 +30,7 @@ def execute(cls, coder, skill_name, **kwargs):
             return "Error: Skill name is required."
 
         # Check if we're in agent mode
-        if not hasattr(coder, "edit_format") or coder.edit_format != "agent":
+        if not hasattr(coder, "edit_format") or coder.edit_format not in ("agent", "subagent"):
             return "Error: Skill removal is only available in agent mode."
 
         # Check if skills_manager is available

From 7f8296edd2e4aa46eab14e3b70e6a690a2d7552e Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Sun, 31 May 2026 15:58:50 -0400
Subject: [PATCH 26/37] Switch to newly spawned agent on creation

---
 cecli/commands/spawn_agent.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/cecli/commands/spawn_agent.py b/cecli/commands/spawn_agent.py
index de77578713f..6b1c185d17d 100644
--- a/cecli/commands/spawn_agent.py
+++ b/cecli/commands/spawn_agent.py
@@ -27,11 +27,21 @@ async def execute(cls, io, coder, args, **kwargs):
 
         try:
             agent_service = AgentService.get_instance(coder)
-            await agent_service.spawn(name, prompt, parent=coder, auto_reap=False)
+            new_coder, info = await agent_service.spawn(name, prompt, parent=coder, auto_reap=False)
+
+            # Set the newly spawned agent as the foreground agent
+            agent_service.foreground_uuid = info.coder.uuid
 
             if coder.tui and coder.tui():
-                switch_key = coder.tui().get_keys_for("next_agent")
-                io.tool_output(f"Sub-agent '{name}' spawned. " f"Switch to it with {switch_key}")
+                tui = coder.tui()
+                switch_key = tui.get_keys_for("next_agent")
+                io.tool_output(f"Sub-agent '{name}' spawned and active. Switch with {switch_key}")
+
+                # Switch TUI display to the new sub-agent's container
+                try:
+                    tui.call_from_thread(tui._switch_to_container, info.coder.uuid)
+                except Exception:
+                    pass
         except ValueError as e:
             io.tool_error(f"Error: {e}")
         except RuntimeError as e:

From d84adf66d0c6d583ed15bef9b5be8acd4a237306 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Sun, 31 May 2026 17:51:34 -0400
Subject: [PATCH 27/37] Update model-metadata disable default litellm metadata
 fetch on start up, bump version

---
 cecli/__init__.py                   |    2 +-
 cecli/main.py                       |    2 +-
 cecli/resources/model-metadata.json | 1801 ++++++++++++++++++++++-----
 3 files changed, 1521 insertions(+), 284 deletions(-)

diff --git a/cecli/__init__.py b/cecli/__init__.py
index 32ad82fb67b..6a368cff4c6 100644
--- a/cecli/__init__.py
+++ b/cecli/__init__.py
@@ -1,6 +1,6 @@
 from packaging import version
 
-__version__ = "0.99.12.dev"
+__version__ = "0.100.2.dev"
 safe_version = __version__
 
 try:
diff --git a/cecli/main.py b/cecli/main.py
index 69892725e38..17b96a8f8af 100644
--- a/cecli/main.py
+++ b/cecli/main.py
@@ -584,7 +584,7 @@ async def main_async(argv=None, input=None, output=None, force_git_root=None, re
     if not args.verify_ssl:
         import httpx
 
-        os.environ["SSL_VERIFY"] = ""
+        os.environ["LITELLM_LOCAL_MODEL_COST"] = "true"
         litellm._load_litellm()
         litellm._lazy_module.client_session = httpx.Client(verify=False)
         litellm._lazy_module.aclient_session = httpx.AsyncClient(verify=False)
diff --git a/cecli/resources/model-metadata.json b/cecli/resources/model-metadata.json
index 2413e5407f4..8aee168a3e5 100644
--- a/cecli/resources/model-metadata.json
+++ b/cecli/resources/model-metadata.json
@@ -374,7 +374,6 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true
   },
   "anthropic.claude-haiku-4-5@20251001": {
@@ -398,7 +397,6 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_streaming": true,
     "supports_native_structured_output": true
   },
@@ -424,8 +422,8 @@
     "supports_vision": true,
     "supports_prompt_caching": false,
     "supports_reasoning": true,
-    "supports_minimal_reasoning_effort": true,
-    "supports_tool_choice": true
+    "supports_tool_choice": true,
+    "supports_output_config": true
   },
   "anthropic.claude-opus-4-1-20250805-v1:0": {
     "cache_creation_input_token_cost": 0.00001875,
@@ -450,8 +448,7 @@
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "anthropic.claude-opus-4-20250514-v1:0": {
     "cache_creation_input_token_cost": 0.00001875,
@@ -476,8 +473,7 @@
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "anthropic.claude-opus-4-5-20251101-v1:0": {
     "cache_creation_input_token_cost": 0.00000625,
@@ -501,12 +497,12 @@
     "supports_pdf_input": true,
     "supports_prompt_caching": true,
     "supports_reasoning": true,
-    "supports_minimal_reasoning_effort": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159,
-    "supports_native_structured_output": true
+    "supports_native_structured_output": true,
+    "supports_output_config": true,
+    "bedrock_output_config_effort_ceiling": "high"
   },
   "anthropic.claude-opus-4-6-v1": {
     "cache_creation_input_token_cost": 0.00000625,
@@ -533,10 +529,10 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true,
+    "supports_output_config": true,
     "supports_max_reasoning_effort": true,
-    "supports_minimal_reasoning_effort": true
+    "bedrock_output_config_effort_ceiling": "max"
   },
   "anthropic.claude-opus-4-7": {
     "cache_creation_input_token_cost": 0.00000625,
@@ -564,10 +560,41 @@
     "supports_tool_choice": true,
     "supports_vision": true,
     "supports_xhigh_reasoning_effort": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true,
     "supports_max_reasoning_effort": true,
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true,
+    "bedrock_output_config_effort_ceiling": "xhigh"
+  },
+  "anthropic.claude-opus-4-8": {
+    "cache_creation_input_token_cost": 0.00000625,
+    "cache_creation_input_token_cost_above_1hr": 0.00001,
+    "cache_read_input_token_cost": 5e-7,
+    "input_cost_per_token": 0.000005,
+    "litellm_provider": "bedrock_converse",
+    "max_input_tokens": 1000000,
+    "max_output_tokens": 128000,
+    "max_tokens": 128000,
+    "mode": "chat",
+    "output_cost_per_token": 0.000025,
+    "search_context_cost_per_query": {
+      "search_context_size_high": 0.01,
+      "search_context_size_low": 0.01,
+      "search_context_size_medium": 0.01
+    },
+    "supports_assistant_prefill": false,
+    "supports_computer_use": true,
+    "supports_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_tool_choice": true,
+    "supports_vision": true,
+    "supports_xhigh_reasoning_effort": true,
+    "supports_native_structured_output": true,
+    "supports_max_reasoning_effort": true,
+    "supports_output_config": true,
+    "bedrock_output_config_effort_ceiling": "xhigh"
   },
   "anthropic.claude-sonnet-4-20250514-v1:0": {
     "cache_creation_input_token_cost": 0.00000375,
@@ -596,8 +623,7 @@
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "anthropic.claude-sonnet-4-5-20250929-v1:0": {
     "cache_creation_input_token_cost": 0.00000375,
@@ -629,7 +655,6 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159,
     "supports_native_structured_output": true
   },
   "anthropic.claude-sonnet-4-6": {
@@ -658,9 +683,8 @@
     "supports_max_reasoning_effort": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true,
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true
   },
   "anthropic.claude-v1": {
     "input_cost_per_token": 0.000008,
@@ -962,7 +986,6 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true
   },
   "apac.anthropic.claude-sonnet-4-20250514-v1:0": {
@@ -992,8 +1015,7 @@
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "au.anthropic.claude-haiku-4-5-20251001-v1:0": {
     "cache_creation_input_token_cost": 0.000001375,
@@ -1014,7 +1036,6 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true
   },
   "au.anthropic.claude-opus-4-6-v1": {
@@ -1041,10 +1062,10 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true,
+    "supports_output_config": true,
     "supports_max_reasoning_effort": true,
-    "supports_minimal_reasoning_effort": true
+    "bedrock_output_config_effort_ceiling": "max"
   },
   "au.anthropic.claude-opus-4-7": {
     "cache_creation_input_token_cost": 0.000006875,
@@ -1071,10 +1092,41 @@
     "supports_tool_choice": true,
     "supports_vision": true,
     "supports_xhigh_reasoning_effort": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true,
     "supports_max_reasoning_effort": true,
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true,
+    "bedrock_output_config_effort_ceiling": "xhigh"
+  },
+  "au.anthropic.claude-opus-4-8": {
+    "cache_creation_input_token_cost": 0.000006875,
+    "cache_creation_input_token_cost_above_1hr": 0.000011,
+    "cache_read_input_token_cost": 5.5e-7,
+    "input_cost_per_token": 0.0000055,
+    "litellm_provider": "bedrock_converse",
+    "max_input_tokens": 1000000,
+    "max_output_tokens": 128000,
+    "max_tokens": 128000,
+    "mode": "chat",
+    "output_cost_per_token": 0.0000275,
+    "search_context_cost_per_query": {
+      "search_context_size_high": 0.01,
+      "search_context_size_low": 0.01,
+      "search_context_size_medium": 0.01
+    },
+    "supports_assistant_prefill": false,
+    "supports_computer_use": true,
+    "supports_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_tool_choice": true,
+    "supports_vision": true,
+    "supports_xhigh_reasoning_effort": true,
+    "supports_native_structured_output": true,
+    "supports_max_reasoning_effort": true,
+    "supports_output_config": true,
+    "bedrock_output_config_effort_ceiling": "xhigh"
   },
   "au.anthropic.claude-sonnet-4-5-20250929-v1:0": {
     "cache_creation_input_token_cost": 0.000004125,
@@ -1104,7 +1156,6 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true
   },
   "au.anthropic.claude-sonnet-4-6": {
@@ -1132,9 +1183,8 @@
     "supports_max_reasoning_effort": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true,
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true
   },
   "azure/command-r-plus": {
     "input_cost_per_token": 0.000003,
@@ -4540,10 +4590,10 @@
     "supports_pdf_input": true,
     "supports_prompt_caching": true,
     "supports_reasoning": true,
-    "supports_minimal_reasoning_effort": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true
+    "supports_vision": true,
+    "supports_output_config": true
   },
   "azure_ai/claude-opus-4-6": {
     "input_cost_per_token": 0.000005,
@@ -4570,9 +4620,8 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159,
-    "supports_max_reasoning_effort": true,
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true,
+    "supports_max_reasoning_effort": true
   },
   "azure_ai/claude-opus-4-7": {
     "input_cost_per_token": 0.000005,
@@ -4600,9 +4649,35 @@
     "supports_tool_choice": true,
     "supports_vision": true,
     "supports_xhigh_reasoning_effort": true,
-    "tool_use_system_prompt_tokens": 159,
-    "supports_max_reasoning_effort": true,
-    "supports_minimal_reasoning_effort": true
+    "supports_max_reasoning_effort": true
+  },
+  "azure_ai/claude-opus-4-8": {
+    "input_cost_per_token": 0.000005,
+    "output_cost_per_token": 0.000025,
+    "litellm_provider": "azure_ai",
+    "max_input_tokens": 200000,
+    "max_output_tokens": 128000,
+    "max_tokens": 128000,
+    "mode": "chat",
+    "search_context_cost_per_query": {
+      "search_context_size_high": 0.01,
+      "search_context_size_low": 0.01,
+      "search_context_size_medium": 0.01
+    },
+    "cache_creation_input_token_cost": 0.00000625,
+    "cache_creation_input_token_cost_above_1hr": 0.00001,
+    "cache_read_input_token_cost": 5e-7,
+    "supports_assistant_prefill": false,
+    "supports_computer_use": true,
+    "supports_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_tool_choice": true,
+    "supports_vision": true,
+    "supports_xhigh_reasoning_effort": true,
+    "supports_max_reasoning_effort": true
   },
   "azure_ai/claude-sonnet-4-5": {
     "cache_creation_input_token_cost": 0.00000375,
@@ -4646,8 +4721,7 @@
     "supports_max_reasoning_effort": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true
   },
   "azure_ai/deepseek-r1": {
     "input_cost_per_token": 0.00000135,
@@ -4743,57 +4817,339 @@
     "supports_tool_choice": true,
     "supports_web_search": true
   },
-  "azure_ai/gpt-oss-120b": {
-    "input_cost_per_token": 1.5e-7,
-    "output_cost_per_token": 6e-7,
+  "azure_ai/gpt-5.4": {
+    "cache_read_input_token_cost": 2.5e-7,
+    "cache_read_input_token_cost_above_272k_tokens": 5e-7,
+    "cache_read_input_token_cost_priority": 5e-7,
+    "cache_read_input_token_cost_above_272k_tokens_priority": 0.000001,
+    "input_cost_per_token": 0.0000025,
+    "input_cost_per_token_above_272k_tokens": 0.000005,
+    "input_cost_per_token_priority": 0.000005,
+    "input_cost_per_token_above_272k_tokens_priority": 0.00001,
     "litellm_provider": "azure_ai",
-    "max_input_tokens": 131072,
-    "max_output_tokens": 131072,
-    "max_tokens": 131072,
+    "max_input_tokens": 1050000,
+    "max_output_tokens": 128000,
+    "max_tokens": 128000,
     "mode": "chat",
-    "source": "https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/",
+    "output_cost_per_token": 0.000015,
+    "output_cost_per_token_above_272k_tokens": 0.0000225,
+    "output_cost_per_token_priority": 0.00003,
+    "output_cost_per_token_above_272k_tokens_priority": 0.000045,
+    "source": "https://ai.azure.com/catalog/models/gpt-5.4",
+    "supported_endpoints": [
+      "/v1/chat/completions",
+      "/v1/batch",
+      "/v1/responses"
+    ],
+    "supported_modalities": [
+      "text",
+      "image"
+    ],
+    "supported_output_modalities": [
+      "text"
+    ],
     "supports_function_calling": true,
+    "supports_native_streaming": true,
     "supports_parallel_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
     "supports_response_schema": true,
-    "supports_tool_choice": true
+    "supports_system_messages": true,
+    "supports_tool_choice": true,
+    "supports_service_tier": true,
+    "supports_vision": true,
+    "supports_web_search": true,
+    "supports_none_reasoning_effort": true,
+    "supports_xhigh_reasoning_effort": true,
+    "supports_minimal_reasoning_effort": true
   },
-  "azure_ai/grok-3": {
-    "input_cost_per_token": 0.000003,
+  "azure_ai/gpt-5.4-2026-03-05": {
+    "cache_read_input_token_cost": 2.5e-7,
+    "cache_read_input_token_cost_above_272k_tokens": 5e-7,
+    "cache_read_input_token_cost_priority": 5e-7,
+    "cache_read_input_token_cost_above_272k_tokens_priority": 0.000001,
+    "input_cost_per_token": 0.0000025,
+    "input_cost_per_token_above_272k_tokens": 0.000005,
+    "input_cost_per_token_priority": 0.000005,
+    "input_cost_per_token_above_272k_tokens_priority": 0.00001,
     "litellm_provider": "azure_ai",
-    "max_input_tokens": 131072,
-    "max_output_tokens": 131072,
-    "max_tokens": 131072,
+    "max_input_tokens": 1050000,
+    "max_output_tokens": 128000,
+    "max_tokens": 128000,
     "mode": "chat",
     "output_cost_per_token": 0.000015,
-    "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/",
+    "output_cost_per_token_above_272k_tokens": 0.0000225,
+    "output_cost_per_token_priority": 0.00003,
+    "output_cost_per_token_above_272k_tokens_priority": 0.000045,
+    "source": "https://ai.azure.com/catalog/models/gpt-5.4",
+    "supported_endpoints": [
+      "/v1/chat/completions",
+      "/v1/batch",
+      "/v1/responses"
+    ],
+    "supported_modalities": [
+      "text",
+      "image"
+    ],
+    "supported_output_modalities": [
+      "text"
+    ],
     "supports_function_calling": true,
-    "supports_response_schema": false,
+    "supports_native_streaming": true,
+    "supports_parallel_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_system_messages": true,
     "supports_tool_choice": true,
-    "supports_web_search": true
+    "supports_service_tier": true,
+    "supports_vision": true,
+    "supports_web_search": true,
+    "supports_none_reasoning_effort": true,
+    "supports_xhigh_reasoning_effort": true,
+    "supports_minimal_reasoning_effort": true
   },
-  "azure_ai/grok-3-mini": {
-    "input_cost_per_token": 2.5e-7,
+  "azure_ai/gpt-5.4-mini": {
+    "cache_read_input_token_cost": 7.5e-8,
+    "cache_read_input_token_cost_above_272k_tokens": 1.5e-7,
+    "cache_read_input_token_cost_priority": 1.5e-7,
+    "cache_read_input_token_cost_above_272k_tokens_priority": 3e-7,
+    "input_cost_per_token": 7.5e-7,
+    "input_cost_per_token_above_272k_tokens": 0.0000015,
+    "input_cost_per_token_priority": 0.0000015,
+    "input_cost_per_token_above_272k_tokens_priority": 0.000003,
     "litellm_provider": "azure_ai",
-    "max_input_tokens": 131072,
-    "max_output_tokens": 131072,
-    "max_tokens": 131072,
+    "max_input_tokens": 400000,
+    "max_output_tokens": 128000,
+    "max_tokens": 128000,
     "mode": "chat",
-    "output_cost_per_token": 0.00000127,
-    "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/",
+    "output_cost_per_token": 0.0000045,
+    "output_cost_per_token_above_272k_tokens": 0.00000675,
+    "output_cost_per_token_priority": 0.000009,
+    "output_cost_per_token_above_272k_tokens_priority": 0.0000135,
+    "source": "https://ai.azure.com/catalog/models/gpt-5.4-mini",
+    "supported_endpoints": [
+      "/v1/chat/completions",
+      "/v1/batch",
+      "/v1/responses"
+    ],
+    "supported_modalities": [
+      "text",
+      "image"
+    ],
+    "supported_output_modalities": [
+      "text"
+    ],
     "supports_function_calling": true,
+    "supports_native_streaming": true,
+    "supports_parallel_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
     "supports_reasoning": true,
-    "supports_response_schema": false,
+    "supports_response_schema": true,
+    "supports_system_messages": true,
     "supports_tool_choice": true,
-    "supports_web_search": true
+    "supports_service_tier": true,
+    "supports_vision": true,
+    "supports_web_search": true,
+    "supports_none_reasoning_effort": true,
+    "supports_xhigh_reasoning_effort": true,
+    "supports_minimal_reasoning_effort": false
   },
-  "azure_ai/grok-4": {
-    "input_cost_per_token": 0.000003,
+  "azure_ai/gpt-5.4-mini-2026-03-17": {
+    "cache_read_input_token_cost": 7.5e-8,
+    "cache_read_input_token_cost_above_272k_tokens": 1.5e-7,
+    "cache_read_input_token_cost_priority": 1.5e-7,
+    "cache_read_input_token_cost_above_272k_tokens_priority": 3e-7,
+    "input_cost_per_token": 7.5e-7,
+    "input_cost_per_token_above_272k_tokens": 0.0000015,
+    "input_cost_per_token_priority": 0.0000015,
+    "input_cost_per_token_above_272k_tokens_priority": 0.000003,
     "litellm_provider": "azure_ai",
-    "max_input_tokens": 131072,
-    "max_output_tokens": 131072,
-    "max_tokens": 131072,
+    "max_input_tokens": 400000,
+    "max_output_tokens": 128000,
+    "max_tokens": 128000,
     "mode": "chat",
-    "output_cost_per_token": 0.000015,
+    "output_cost_per_token": 0.0000045,
+    "output_cost_per_token_above_272k_tokens": 0.00000675,
+    "output_cost_per_token_priority": 0.000009,
+    "output_cost_per_token_above_272k_tokens_priority": 0.0000135,
+    "source": "https://ai.azure.com/catalog/models/gpt-5.4-mini",
+    "supported_endpoints": [
+      "/v1/chat/completions",
+      "/v1/batch",
+      "/v1/responses"
+    ],
+    "supported_modalities": [
+      "text",
+      "image"
+    ],
+    "supported_output_modalities": [
+      "text"
+    ],
+    "supports_function_calling": true,
+    "supports_native_streaming": true,
+    "supports_parallel_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_system_messages": true,
+    "supports_tool_choice": true,
+    "supports_service_tier": true,
+    "supports_vision": true,
+    "supports_web_search": true,
+    "supports_none_reasoning_effort": true,
+    "supports_xhigh_reasoning_effort": true,
+    "supports_minimal_reasoning_effort": false
+  },
+  "azure_ai/gpt-5.4-nano": {
+    "cache_read_input_token_cost": 2e-8,
+    "cache_read_input_token_cost_above_272k_tokens": 4e-8,
+    "cache_read_input_token_cost_priority": 4e-8,
+    "cache_read_input_token_cost_above_272k_tokens_priority": 8e-8,
+    "input_cost_per_token": 2e-7,
+    "input_cost_per_token_above_272k_tokens": 4e-7,
+    "input_cost_per_token_priority": 4e-7,
+    "input_cost_per_token_above_272k_tokens_priority": 8e-7,
+    "litellm_provider": "azure_ai",
+    "max_input_tokens": 400000,
+    "max_output_tokens": 128000,
+    "max_tokens": 128000,
+    "mode": "chat",
+    "output_cost_per_token": 0.00000125,
+    "output_cost_per_token_above_272k_tokens": 0.000001875,
+    "output_cost_per_token_priority": 0.0000025,
+    "output_cost_per_token_above_272k_tokens_priority": 0.00000375,
+    "source": "https://ai.azure.com/catalog/models/gpt-5.4-nano",
+    "supported_endpoints": [
+      "/v1/chat/completions",
+      "/v1/batch",
+      "/v1/responses"
+    ],
+    "supported_modalities": [
+      "text",
+      "image"
+    ],
+    "supported_output_modalities": [
+      "text"
+    ],
+    "supports_function_calling": true,
+    "supports_native_streaming": true,
+    "supports_parallel_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_system_messages": true,
+    "supports_tool_choice": true,
+    "supports_service_tier": true,
+    "supports_vision": true,
+    "supports_web_search": true,
+    "supports_none_reasoning_effort": true,
+    "supports_xhigh_reasoning_effort": true,
+    "supports_minimal_reasoning_effort": false
+  },
+  "azure_ai/gpt-5.4-nano-2026-03-17": {
+    "cache_read_input_token_cost": 2e-8,
+    "cache_read_input_token_cost_above_272k_tokens": 4e-8,
+    "cache_read_input_token_cost_priority": 4e-8,
+    "cache_read_input_token_cost_above_272k_tokens_priority": 8e-8,
+    "input_cost_per_token": 2e-7,
+    "input_cost_per_token_above_272k_tokens": 4e-7,
+    "input_cost_per_token_priority": 4e-7,
+    "input_cost_per_token_above_272k_tokens_priority": 8e-7,
+    "litellm_provider": "azure_ai",
+    "max_input_tokens": 400000,
+    "max_output_tokens": 128000,
+    "max_tokens": 128000,
+    "mode": "chat",
+    "output_cost_per_token": 0.00000125,
+    "output_cost_per_token_above_272k_tokens": 0.000001875,
+    "output_cost_per_token_priority": 0.0000025,
+    "output_cost_per_token_above_272k_tokens_priority": 0.00000375,
+    "source": "https://ai.azure.com/catalog/models/gpt-5.4-nano",
+    "supported_endpoints": [
+      "/v1/chat/completions",
+      "/v1/batch",
+      "/v1/responses"
+    ],
+    "supported_modalities": [
+      "text",
+      "image"
+    ],
+    "supported_output_modalities": [
+      "text"
+    ],
+    "supports_function_calling": true,
+    "supports_native_streaming": true,
+    "supports_parallel_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_system_messages": true,
+    "supports_tool_choice": true,
+    "supports_service_tier": true,
+    "supports_vision": true,
+    "supports_web_search": true,
+    "supports_none_reasoning_effort": true,
+    "supports_xhigh_reasoning_effort": true,
+    "supports_minimal_reasoning_effort": false
+  },
+  "azure_ai/gpt-oss-120b": {
+    "input_cost_per_token": 1.5e-7,
+    "output_cost_per_token": 6e-7,
+    "litellm_provider": "azure_ai",
+    "max_input_tokens": 131072,
+    "max_output_tokens": 131072,
+    "max_tokens": 131072,
+    "mode": "chat",
+    "source": "https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/",
+    "supports_function_calling": true,
+    "supports_parallel_function_calling": true,
+    "supports_response_schema": true,
+    "supports_tool_choice": true
+  },
+  "azure_ai/grok-3": {
+    "input_cost_per_token": 0.000003,
+    "litellm_provider": "azure_ai",
+    "max_input_tokens": 131072,
+    "max_output_tokens": 131072,
+    "max_tokens": 131072,
+    "mode": "chat",
+    "output_cost_per_token": 0.000015,
+    "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/",
+    "supports_function_calling": true,
+    "supports_response_schema": false,
+    "supports_tool_choice": true,
+    "supports_web_search": true
+  },
+  "azure_ai/grok-3-mini": {
+    "input_cost_per_token": 2.5e-7,
+    "litellm_provider": "azure_ai",
+    "max_input_tokens": 131072,
+    "max_output_tokens": 131072,
+    "max_tokens": 131072,
+    "mode": "chat",
+    "output_cost_per_token": 0.00000127,
+    "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/",
+    "supports_function_calling": true,
+    "supports_reasoning": true,
+    "supports_response_schema": false,
+    "supports_tool_choice": true,
+    "supports_web_search": true
+  },
+  "azure_ai/grok-4": {
+    "input_cost_per_token": 0.000003,
+    "litellm_provider": "azure_ai",
+    "max_input_tokens": 131072,
+    "max_output_tokens": 131072,
+    "max_tokens": 131072,
+    "mode": "chat",
+    "output_cost_per_token": 0.000015,
     "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/",
     "supports_function_calling": true,
     "supports_response_schema": true,
@@ -6390,7 +6746,6 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true,
     "supports_pdf_input": true
   },
@@ -6568,7 +6923,6 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true,
     "supports_pdf_input": true
   },
@@ -7131,8 +7485,7 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "supports_web_search": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_web_search": true
   },
   "claude-3-haiku-20240307": {
     "cache_creation_input_token_cost": 3e-7,
@@ -7150,8 +7503,7 @@
     "supports_prompt_caching": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 264
+    "supports_vision": true
   },
   "claude-3-opus-20240229": {
     "cache_creation_input_token_cost": 0.00001875,
@@ -7170,8 +7522,7 @@
     "supports_prompt_caching": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 395
+    "supports_vision": true
   },
   "claude-4-opus-20250514": {
     "cache_creation_input_token_cost": 0.00001875,
@@ -7196,8 +7547,7 @@
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "claude-4-sonnet-20250514": {
     "cache_creation_input_token_cost": 0.00000375,
@@ -7227,8 +7577,7 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "supports_web_search": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_web_search": true
   },
   "claude-haiku-4-5": {
     "cache_creation_input_token_cost": 0.00000125,
@@ -7296,8 +7645,7 @@
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "claude-opus-4-1-20250805": {
     "cache_creation_input_token_cost": 0.00001875,
@@ -7324,8 +7672,7 @@
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "claude-opus-4-20250514": {
     "cache_creation_input_token_cost": 0.00001875,
@@ -7352,8 +7699,7 @@
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "claude-opus-4-5": {
     "cache_creation_input_token_cost": 0.00000625,
@@ -7377,11 +7723,10 @@
     "supports_pdf_input": true,
     "supports_prompt_caching": true,
     "supports_reasoning": true,
-    "supports_minimal_reasoning_effort": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_output_config": true
   },
   "claude-opus-4-5-20251101": {
     "cache_creation_input_token_cost": 0.00000625,
@@ -7405,11 +7750,10 @@
     "supports_pdf_input": true,
     "supports_prompt_caching": true,
     "supports_reasoning": true,
-    "supports_minimal_reasoning_effort": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_output_config": true
   },
   "claude-opus-4-6": {
     "cache_creation_input_token_cost": 0.00000625,
@@ -7437,13 +7781,12 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "provider_specific_entry": {
       "us": 1.1,
       "fast": 6
     },
-    "supports_max_reasoning_effort": true,
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true,
+    "supports_max_reasoning_effort": true
   },
   "claude-opus-4-6-20260205": {
     "cache_creation_input_token_cost": 0.00000625,
@@ -7471,13 +7814,12 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "provider_specific_entry": {
       "us": 1.1,
       "fast": 6
     },
     "supports_max_reasoning_effort": true,
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true
   },
   "claude-opus-4-7": {
     "cache_creation_input_token_cost": 0.00000625,
@@ -7507,12 +7849,11 @@
     "supports_vision": true,
     "supports_xhigh_reasoning_effort": true,
     "supports_max_reasoning_effort": true,
-    "tool_use_system_prompt_tokens": 346,
     "provider_specific_entry": {
       "us": 1.1,
       "fast": 6
     },
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true
   },
   "claude-opus-4-7-20260416": {
     "cache_creation_input_token_cost": 0.00000625,
@@ -7542,12 +7883,45 @@
     "supports_vision": true,
     "supports_xhigh_reasoning_effort": true,
     "supports_max_reasoning_effort": true,
-    "tool_use_system_prompt_tokens": 346,
     "provider_specific_entry": {
       "us": 1.1,
       "fast": 6
     },
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true
+  },
+  "claude-opus-4-8": {
+    "cache_creation_input_token_cost": 0.00000625,
+    "cache_creation_input_token_cost_above_1hr": 0.00001,
+    "cache_read_input_token_cost": 5e-7,
+    "input_cost_per_token": 0.000005,
+    "litellm_provider": "anthropic",
+    "max_input_tokens": 1000000,
+    "max_output_tokens": 128000,
+    "max_tokens": 128000,
+    "mode": "chat",
+    "output_cost_per_token": 0.000025,
+    "search_context_cost_per_query": {
+      "search_context_size_high": 0.01,
+      "search_context_size_low": 0.01,
+      "search_context_size_medium": 0.01
+    },
+    "supports_adaptive_thinking": true,
+    "supports_assistant_prefill": false,
+    "supports_computer_use": true,
+    "supports_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_tool_choice": true,
+    "supports_vision": true,
+    "supports_xhigh_reasoning_effort": true,
+    "supports_max_reasoning_effort": true,
+    "provider_specific_entry": {
+      "us": 1.1,
+      "fast": 2
+    },
+    "supports_output_config": true
   },
   "claude-sonnet-4-20250514": {
     "deprecation_date": "2026-05-14",
@@ -7578,8 +7952,7 @@
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "claude-sonnet-4-5": {
     "cache_creation_input_token_cost": 0.00000375,
@@ -7608,8 +7981,7 @@
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346
+    "supports_vision": true
   },
   "claude-sonnet-4-5-20250929": {
     "cache_creation_input_token_cost": 0.00000375,
@@ -7639,8 +8011,7 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "supports_web_search": true,
-    "tool_use_system_prompt_tokens": 346
+    "supports_web_search": true
   },
   "claude-sonnet-4-5-20250929-v1:0": {
     "cache_creation_input_token_cost": 0.00000375,
@@ -7664,8 +8035,7 @@
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "claude-sonnet-4-6": {
     "cache_creation_input_token_cost": 0.00000375,
@@ -7693,8 +8063,7 @@
     "supports_max_reasoning_effort": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true
   },
   "cloudflare/@cf/meta/llama-2-7b-chat-fp16": {
     "input_cost_per_token": 0.000001923,
@@ -8841,8 +9210,8 @@
     "supports_assistant_prefill": true,
     "supports_function_calling": true,
     "supports_reasoning": true,
-    "supports_minimal_reasoning_effort": true,
-    "supports_tool_choice": true
+    "supports_tool_choice": true,
+    "supports_output_config": true
   },
   "databricks/databricks-claude-sonnet-4": {
     "input_cost_per_token": 0.0000029999900000000002,
@@ -10359,7 +10728,6 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true
   },
   "eu.anthropic.claude-opus-4-1-20250805-v1:0": {
@@ -10385,8 +10753,7 @@
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "eu.anthropic.claude-opus-4-20250514-v1:0": {
     "cache_creation_input_token_cost": 0.00001875,
@@ -10411,8 +10778,7 @@
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "eu.anthropic.claude-opus-4-5-20251101-v1:0": {
     "cache_creation_input_token_cost": 0.00000625,
@@ -10432,15 +10798,15 @@
     "supports_assistant_prefill": true,
     "supports_computer_use": true,
     "supports_function_calling": true,
-    "supports_minimal_reasoning_effort": true,
     "supports_pdf_input": true,
     "supports_prompt_caching": true,
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159,
-    "supports_native_structured_output": true
+    "supports_native_structured_output": true,
+    "supports_output_config": true,
+    "bedrock_output_config_effort_ceiling": "high"
   },
   "eu.anthropic.claude-opus-4-6-v1": {
     "cache_creation_input_token_cost": 0.000006875,
@@ -10466,10 +10832,10 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true,
+    "supports_output_config": true,
     "supports_max_reasoning_effort": true,
-    "supports_minimal_reasoning_effort": true
+    "bedrock_output_config_effort_ceiling": "max"
   },
   "eu.anthropic.claude-opus-4-7": {
     "cache_creation_input_token_cost": 0.000006875,
@@ -10496,10 +10862,41 @@
     "supports_tool_choice": true,
     "supports_vision": true,
     "supports_xhigh_reasoning_effort": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true,
     "supports_max_reasoning_effort": true,
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true,
+    "bedrock_output_config_effort_ceiling": "xhigh"
+  },
+  "eu.anthropic.claude-opus-4-8": {
+    "cache_creation_input_token_cost": 0.000006875,
+    "cache_creation_input_token_cost_above_1hr": 0.000011,
+    "cache_read_input_token_cost": 5.5e-7,
+    "input_cost_per_token": 0.0000055,
+    "litellm_provider": "bedrock_converse",
+    "max_input_tokens": 1000000,
+    "max_output_tokens": 128000,
+    "max_tokens": 128000,
+    "mode": "chat",
+    "output_cost_per_token": 0.0000275,
+    "search_context_cost_per_query": {
+      "search_context_size_high": 0.01,
+      "search_context_size_low": 0.01,
+      "search_context_size_medium": 0.01
+    },
+    "supports_assistant_prefill": false,
+    "supports_computer_use": true,
+    "supports_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_tool_choice": true,
+    "supports_vision": true,
+    "supports_xhigh_reasoning_effort": true,
+    "supports_native_structured_output": true,
+    "supports_max_reasoning_effort": true,
+    "supports_output_config": true,
+    "bedrock_output_config_effort_ceiling": "xhigh"
   },
   "eu.anthropic.claude-sonnet-4-20250514-v1:0": {
     "cache_creation_input_token_cost": 0.00000375,
@@ -10528,8 +10925,7 @@
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "eu.anthropic.claude-sonnet-4-5-20250929-v1:0": {
     "cache_creation_input_token_cost": 0.000004125,
@@ -10559,7 +10955,6 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true
   },
   "eu.anthropic.claude-sonnet-4-6": {
@@ -10587,9 +10982,8 @@
     "supports_max_reasoning_effort": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true,
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true
   },
   "eu.deepseek.v3.2": {
     "input_cost_per_token": 7.4e-7,
@@ -11381,7 +11775,22 @@
     "supports_response_schema": true,
     "supports_tool_choice": true
   },
-  "fireworks_ai/accounts/fireworks/models/gpt-oss-120b": {
+  "fireworks_ai/accounts/fireworks/models/glm-5p1": {
+    "cache_read_input_token_cost": 2.6e-7,
+    "input_cost_per_token": 0.0000014,
+    "litellm_provider": "fireworks_ai",
+    "max_input_tokens": 202800,
+    "max_output_tokens": 202800,
+    "max_tokens": 202800,
+    "mode": "chat",
+    "output_cost_per_token": 0.0000044,
+    "source": "https://fireworks.ai/models/fireworks/glm-5p1",
+    "supports_function_calling": false,
+    "supports_reasoning": true,
+    "supports_response_schema": false,
+    "supports_tool_choice": false
+  },
+  "fireworks_ai/accounts/fireworks/models/gpt-oss-120b": {
     "input_cost_per_token": 1.5e-7,
     "litellm_provider": "fireworks_ai",
     "max_input_tokens": 131072,
@@ -12982,6 +13391,21 @@
     "supports_response_schema": true,
     "supports_tool_choice": true
   },
+  "fireworks_ai/glm-5p1": {
+    "cache_read_input_token_cost": 2.6e-7,
+    "input_cost_per_token": 0.0000014,
+    "litellm_provider": "fireworks_ai",
+    "max_input_tokens": 202800,
+    "max_output_tokens": 202800,
+    "max_tokens": 202800,
+    "mode": "chat",
+    "output_cost_per_token": 0.0000044,
+    "source": "https://fireworks.ai/models/fireworks/glm-5p1",
+    "supports_function_calling": false,
+    "supports_reasoning": true,
+    "supports_response_schema": false,
+    "supports_tool_choice": false
+  },
   "fireworks_ai/kimi-k2p5": {
     "cache_read_input_token_cost": 1e-7,
     "input_cost_per_token": 6e-7,
@@ -14086,6 +14510,73 @@
     },
     "web_search_billing_unit": "per_query"
   },
+  "gemini-3.1-flash-lite": {
+    "cache_read_input_token_cost": 2.5e-8,
+    "cache_read_input_token_cost_batches": 1.25e-8,
+    "cache_read_input_token_cost_flex": 1.25e-8,
+    "cache_read_input_token_cost_per_audio_token": 5e-8,
+    "cache_read_input_token_cost_priority": 4.5e-8,
+    "input_cost_per_audio_token": 5e-7,
+    "input_cost_per_token": 2.5e-7,
+    "input_cost_per_token_batches": 1.25e-7,
+    "input_cost_per_token_flex": 1.25e-7,
+    "input_cost_per_token_priority": 4.5e-7,
+    "litellm_provider": "vertex_ai-language-models",
+    "max_audio_length_hours": 8.4,
+    "max_audio_per_prompt": 1,
+    "max_images_per_prompt": 3000,
+    "max_input_tokens": 1048576,
+    "max_output_tokens": 65536,
+    "max_pdf_size_mb": 30,
+    "max_tokens": 65536,
+    "max_video_length": 1,
+    "max_videos_per_prompt": 10,
+    "mode": "chat",
+    "output_cost_per_reasoning_token": 0.0000015,
+    "output_cost_per_token": 0.0000015,
+    "output_cost_per_token_batches": 7.5e-7,
+    "output_cost_per_token_flex": 7.5e-7,
+    "output_cost_per_token_priority": 0.0000027,
+    "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-3.1-flash-lite",
+    "supported_endpoints": [
+      "/v1/chat/completions",
+      "/v1/completions",
+      "/v1/batch"
+    ],
+    "supported_modalities": [
+      "text",
+      "image",
+      "audio",
+      "video"
+    ],
+    "supported_output_modalities": [
+      "text"
+    ],
+    "supports_audio_input": true,
+    "supports_audio_output": false,
+    "supports_code_execution": true,
+    "supports_file_search": true,
+    "supports_function_calling": true,
+    "supports_parallel_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_system_messages": true,
+    "supports_tool_choice": true,
+    "supports_url_context": true,
+    "supports_video_input": true,
+    "supports_vision": true,
+    "supports_web_search": true,
+    "supports_native_streaming": true,
+    "search_context_cost_per_query": {
+      "search_context_size_low": 0.014,
+      "search_context_size_medium": 0.014,
+      "search_context_size_high": 0.014
+    },
+    "web_search_billing_unit": "per_query",
+    "supports_service_tier": true
+  },
   "gemini-3.1-flash-lite-preview": {
     "cache_read_input_token_cost": 2.5e-8,
     "cache_read_input_token_cost_per_audio_token": 5e-8,
@@ -14297,6 +14788,65 @@
     },
     "web_search_billing_unit": "per_query"
   },
+  "gemini-3.5-flash": {
+    "cache_read_input_token_cost": 1.5e-7,
+    "input_cost_per_audio_token": 0.000001,
+    "input_cost_per_token": 0.0000015,
+    "litellm_provider": "vertex_ai-language-models",
+    "max_audio_length_hours": 8.4,
+    "max_audio_per_prompt": 1,
+    "max_images_per_prompt": 3000,
+    "max_input_tokens": 1048576,
+    "max_output_tokens": 65535,
+    "max_pdf_size_mb": 30,
+    "max_tokens": 65535,
+    "max_video_length": 1,
+    "max_videos_per_prompt": 10,
+    "mode": "chat",
+    "output_cost_per_reasoning_token": 0.000009,
+    "output_cost_per_token": 0.000009,
+    "source": "https://ai.google.dev/pricing/gemini-3",
+    "supported_endpoints": [
+      "/v1/chat/completions",
+      "/v1/completions",
+      "/v1/batch"
+    ],
+    "supported_modalities": [
+      "text",
+      "image",
+      "audio",
+      "video"
+    ],
+    "supported_output_modalities": [
+      "text"
+    ],
+    "supports_audio_output": false,
+    "supports_audio_input": true,
+    "supports_function_calling": true,
+    "supports_parallel_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_system_messages": true,
+    "supports_tool_choice": true,
+    "supports_url_context": true,
+    "supports_video_input": true,
+    "supports_vision": true,
+    "supports_web_search": true,
+    "supports_native_streaming": true,
+    "input_cost_per_token_priority": 0.0000027,
+    "input_cost_per_audio_token_priority": 0.0000018,
+    "output_cost_per_token_priority": 0.0000162,
+    "cache_read_input_token_cost_priority": 2.7e-7,
+    "supports_service_tier": true,
+    "search_context_cost_per_query": {
+      "search_context_size_low": 0.014,
+      "search_context_size_medium": 0.014,
+      "search_context_size_high": 0.014
+    },
+    "web_search_billing_unit": "per_query"
+  },
   "gemini-exp-1206": {
     "cache_read_input_token_cost": 3e-8,
     "input_cost_per_audio_token": 0.000001,
@@ -15488,6 +16038,75 @@
     },
     "web_search_billing_unit": "per_query"
   },
+  "gemini/gemini-3.1-flash-lite": {
+    "cache_read_input_token_cost": 2.5e-8,
+    "cache_read_input_token_cost_batches": 1.25e-8,
+    "cache_read_input_token_cost_flex": 1.25e-8,
+    "cache_read_input_token_cost_per_audio_token": 5e-8,
+    "cache_read_input_token_cost_priority": 4.5e-8,
+    "input_cost_per_audio_token": 5e-7,
+    "input_cost_per_token": 2.5e-7,
+    "input_cost_per_token_batches": 1.25e-7,
+    "input_cost_per_token_flex": 1.25e-7,
+    "input_cost_per_token_priority": 4.5e-7,
+    "litellm_provider": "gemini",
+    "max_audio_length_hours": 8.4,
+    "max_audio_per_prompt": 1,
+    "max_images_per_prompt": 3000,
+    "max_input_tokens": 1048576,
+    "max_output_tokens": 65536,
+    "max_pdf_size_mb": 30,
+    "max_tokens": 65536,
+    "max_video_length": 1,
+    "max_videos_per_prompt": 10,
+    "mode": "chat",
+    "output_cost_per_reasoning_token": 0.0000015,
+    "output_cost_per_token": 0.0000015,
+    "output_cost_per_token_batches": 7.5e-7,
+    "output_cost_per_token_flex": 7.5e-7,
+    "output_cost_per_token_priority": 0.0000027,
+    "rpm": 15,
+    "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-3.1-flash-lite",
+    "supported_endpoints": [
+      "/v1/chat/completions",
+      "/v1/completions",
+      "/v1/batch"
+    ],
+    "supported_modalities": [
+      "text",
+      "image",
+      "audio",
+      "video"
+    ],
+    "supported_output_modalities": [
+      "text"
+    ],
+    "supports_audio_input": true,
+    "supports_audio_output": false,
+    "supports_code_execution": true,
+    "supports_file_search": true,
+    "supports_function_calling": true,
+    "supports_parallel_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_system_messages": true,
+    "supports_tool_choice": true,
+    "supports_url_context": true,
+    "supports_video_input": true,
+    "supports_vision": true,
+    "supports_web_search": true,
+    "supports_native_streaming": true,
+    "tpm": 250000,
+    "search_context_cost_per_query": {
+      "search_context_size_low": 0.014,
+      "search_context_size_medium": 0.014,
+      "search_context_size_high": 0.014
+    },
+    "web_search_billing_unit": "per_query",
+    "supports_service_tier": true
+  },
   "gemini/gemini-3.1-flash-lite-preview": {
     "cache_read_input_token_cost": 2.5e-8,
     "cache_read_input_token_cost_per_audio_token": 5e-8,
@@ -15710,43 +16329,104 @@
     },
     "web_search_billing_unit": "per_query"
   },
-  "gemini/gemini-exp-1114": {
-    "input_cost_per_token": 0,
-    "input_cost_per_token_above_128k_tokens": 0,
+  "gemini/gemini-3.5-flash": {
+    "cache_read_input_token_cost": 1.5e-7,
+    "input_cost_per_audio_token": 0.000001,
+    "input_cost_per_token": 0.0000015,
     "litellm_provider": "gemini",
     "max_audio_length_hours": 8.4,
     "max_audio_per_prompt": 1,
     "max_images_per_prompt": 3000,
     "max_input_tokens": 1048576,
-    "max_output_tokens": 8192,
+    "max_output_tokens": 65535,
     "max_pdf_size_mb": 30,
-    "max_tokens": 8192,
+    "max_tokens": 65535,
     "max_video_length": 1,
     "max_videos_per_prompt": 10,
-    "metadata": {
-      "notes": "Rate limits not documented for gemini-exp-1114. Assuming same as gemini-1.5-pro.",
-      "supports_tool_choice": true
-    },
     "mode": "chat",
-    "output_cost_per_token": 0,
-    "output_cost_per_token_above_128k_tokens": 0,
-    "rpm": 1000,
-    "source": "https://ai.google.dev/pricing",
+    "output_cost_per_reasoning_token": 0.000009,
+    "output_cost_per_token": 0.000009,
+    "rpm": 2000,
+    "source": "https://ai.google.dev/pricing/gemini-3",
+    "supported_endpoints": [
+      "/v1/chat/completions",
+      "/v1/completions",
+      "/v1/batch"
+    ],
+    "supported_modalities": [
+      "text",
+      "image",
+      "audio",
+      "video"
+    ],
+    "supported_output_modalities": [
+      "text"
+    ],
+    "supports_audio_output": false,
+    "supports_audio_input": true,
     "supports_function_calling": true,
+    "supports_parallel_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_system_messages": true,
     "supports_tool_choice": true,
+    "supports_url_context": true,
+    "supports_video_input": true,
     "supports_vision": true,
-    "tpm": 4000000
-  },
-  "gemini/gemini-exp-1206": {
-    "input_cost_per_token": 0,
-    "input_cost_per_token_above_128k_tokens": 0,
-    "litellm_provider": "gemini",
-    "max_audio_length_hours": 8.4,
-    "max_audio_per_prompt": 1,
-    "max_images_per_prompt": 3000,
-    "max_input_tokens": 2097152,
+    "supports_web_search": true,
+    "supports_native_streaming": true,
+    "tpm": 800000,
+    "input_cost_per_token_priority": 0.0000027,
+    "input_cost_per_audio_token_priority": 0.0000018,
+    "output_cost_per_token_priority": 0.0000162,
+    "cache_read_input_token_cost_priority": 2.7e-7,
+    "supports_service_tier": true,
+    "search_context_cost_per_query": {
+      "search_context_size_low": 0.014,
+      "search_context_size_medium": 0.014,
+      "search_context_size_high": 0.014
+    },
+    "web_search_billing_unit": "per_query"
+  },
+  "gemini/gemini-exp-1114": {
+    "input_cost_per_token": 0,
+    "input_cost_per_token_above_128k_tokens": 0,
+    "litellm_provider": "gemini",
+    "max_audio_length_hours": 8.4,
+    "max_audio_per_prompt": 1,
+    "max_images_per_prompt": 3000,
+    "max_input_tokens": 1048576,
+    "max_output_tokens": 8192,
+    "max_pdf_size_mb": 30,
+    "max_tokens": 8192,
+    "max_video_length": 1,
+    "max_videos_per_prompt": 10,
+    "metadata": {
+      "notes": "Rate limits not documented for gemini-exp-1114. Assuming same as gemini-1.5-pro.",
+      "supports_tool_choice": true
+    },
+    "mode": "chat",
+    "output_cost_per_token": 0,
+    "output_cost_per_token_above_128k_tokens": 0,
+    "rpm": 1000,
+    "source": "https://ai.google.dev/pricing",
+    "supports_function_calling": true,
+    "supports_response_schema": true,
+    "supports_system_messages": true,
+    "supports_tool_choice": true,
+    "supports_vision": true,
+    "tpm": 4000000
+  },
+  "gemini/gemini-exp-1206": {
+    "input_cost_per_token": 0,
+    "input_cost_per_token_above_128k_tokens": 0,
+    "litellm_provider": "gemini",
+    "max_audio_length_hours": 8.4,
+    "max_audio_per_prompt": 1,
+    "max_images_per_prompt": 3000,
+    "max_input_tokens": 2097152,
     "max_output_tokens": 8192,
     "max_pdf_size_mb": 30,
     "max_tokens": 8192,
@@ -16160,7 +16840,7 @@
     "supports_function_calling": true,
     "supports_parallel_function_calling": true,
     "supports_vision": true,
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true
   },
   "github_copilot/claude-opus-4.6-fast": {
     "litellm_provider": "github_copilot",
@@ -16460,7 +17140,6 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true
   },
   "global.anthropic.claude-opus-4-5-20251101-v1:0": {
@@ -16482,15 +17161,15 @@
     "supports_assistant_prefill": true,
     "supports_computer_use": true,
     "supports_function_calling": true,
-    "supports_minimal_reasoning_effort": true,
     "supports_pdf_input": true,
     "supports_prompt_caching": true,
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159,
-    "supports_native_structured_output": true
+    "supports_native_structured_output": true,
+    "supports_output_config": true,
+    "bedrock_output_config_effort_ceiling": "high"
   },
   "global.anthropic.claude-opus-4-6-v1": {
     "cache_creation_input_token_cost": 0.00000625,
@@ -16517,10 +17196,10 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true,
+    "supports_output_config": true,
     "supports_max_reasoning_effort": true,
-    "supports_minimal_reasoning_effort": true
+    "bedrock_output_config_effort_ceiling": "max"
   },
   "global.anthropic.claude-opus-4-7": {
     "cache_creation_input_token_cost": 0.00000625,
@@ -16548,10 +17227,41 @@
     "supports_tool_choice": true,
     "supports_vision": true,
     "supports_xhigh_reasoning_effort": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true,
     "supports_max_reasoning_effort": true,
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true,
+    "bedrock_output_config_effort_ceiling": "xhigh"
+  },
+  "global.anthropic.claude-opus-4-8": {
+    "cache_creation_input_token_cost": 0.00000625,
+    "cache_creation_input_token_cost_above_1hr": 0.00001,
+    "cache_read_input_token_cost": 5e-7,
+    "input_cost_per_token": 0.000005,
+    "litellm_provider": "bedrock_converse",
+    "max_input_tokens": 1000000,
+    "max_output_tokens": 128000,
+    "max_tokens": 128000,
+    "mode": "chat",
+    "output_cost_per_token": 0.000025,
+    "search_context_cost_per_query": {
+      "search_context_size_high": 0.01,
+      "search_context_size_low": 0.01,
+      "search_context_size_medium": 0.01
+    },
+    "supports_assistant_prefill": false,
+    "supports_computer_use": true,
+    "supports_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_tool_choice": true,
+    "supports_vision": true,
+    "supports_xhigh_reasoning_effort": true,
+    "supports_native_structured_output": true,
+    "supports_max_reasoning_effort": true,
+    "supports_output_config": true,
+    "bedrock_output_config_effort_ceiling": "xhigh"
   },
   "global.anthropic.claude-sonnet-4-20250514-v1:0": {
     "cache_creation_input_token_cost": 0.00000375,
@@ -16580,8 +17290,7 @@
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "global.anthropic.claude-sonnet-4-5-20250929-v1:0": {
     "cache_creation_input_token_cost": 0.00000375,
@@ -16613,7 +17322,6 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true
   },
   "global.anthropic.claude-sonnet-4-6": {
@@ -16642,9 +17350,8 @@
     "supports_max_reasoning_effort": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true,
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true
   },
   "gmi/MiniMaxAI/MiniMax-M2.1": {
     "input_cost_per_token": 3e-7,
@@ -16686,7 +17393,7 @@
     "output_cost_per_token": 0.000025,
     "supports_function_calling": true,
     "supports_vision": true,
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true
   },
   "gmi/anthropic/claude-sonnet-4": {
     "input_cost_per_token": 0.000003,
@@ -17039,6 +17746,8 @@
     "output_cost_per_token": 0.000008,
     "output_cost_per_token_batches": 0.000004,
     "output_cost_per_token_priority": 0.000014,
+    "regional_processing_uplift_multiplier_eu": 1.1,
+    "regional_processing_uplift_multiplier_us": 1.1,
     "supported_endpoints": [
       "/v1/chat/completions",
       "/v1/batch",
@@ -17112,6 +17821,8 @@
     "output_cost_per_token": 0.0000016,
     "output_cost_per_token_batches": 8e-7,
     "output_cost_per_token_priority": 0.0000028,
+    "regional_processing_uplift_multiplier_eu": 1.1,
+    "regional_processing_uplift_multiplier_us": 1.1,
     "supported_endpoints": [
       "/v1/chat/completions",
       "/v1/batch",
@@ -17185,6 +17896,8 @@
     "output_cost_per_token": 4e-7,
     "output_cost_per_token_batches": 2e-7,
     "output_cost_per_token_priority": 8e-7,
+    "regional_processing_uplift_multiplier_eu": 1.1,
+    "regional_processing_uplift_multiplier_us": 1.1,
     "supported_endpoints": [
       "/v1/chat/completions",
       "/v1/batch",
@@ -17256,6 +17969,8 @@
     "output_cost_per_token": 0.00001,
     "output_cost_per_token_batches": 0.000005,
     "output_cost_per_token_priority": 0.000017,
+    "regional_processing_uplift_multiplier_eu": 1.1,
+    "regional_processing_uplift_multiplier_us": 1.1,
     "supports_function_calling": true,
     "supports_parallel_function_calling": true,
     "supports_pdf_input": true,
@@ -17297,6 +18012,8 @@
     "mode": "chat",
     "output_cost_per_token": 0.00001,
     "output_cost_per_token_batches": 0.000005,
+    "regional_processing_uplift_multiplier_eu": 1.1,
+    "regional_processing_uplift_multiplier_us": 1.1,
     "supports_function_calling": true,
     "supports_parallel_function_calling": true,
     "supports_pdf_input": true,
@@ -17318,6 +18035,8 @@
     "mode": "chat",
     "output_cost_per_token": 0.00001,
     "output_cost_per_token_batches": 0.000005,
+    "regional_processing_uplift_multiplier_eu": 1.1,
+    "regional_processing_uplift_multiplier_us": 1.1,
     "supports_function_calling": true,
     "supports_parallel_function_calling": true,
     "supports_pdf_input": true,
@@ -17393,6 +18112,8 @@
     "output_cost_per_token": 6e-7,
     "output_cost_per_token_batches": 3e-7,
     "output_cost_per_token_priority": 0.000001,
+    "regional_processing_uplift_multiplier_eu": 1.1,
+    "regional_processing_uplift_multiplier_us": 1.1,
     "supports_function_calling": true,
     "supports_parallel_function_calling": true,
     "supports_pdf_input": true,
@@ -17662,6 +18383,8 @@
     "output_cost_per_token": 0.00001,
     "output_cost_per_token_flex": 0.000005,
     "output_cost_per_token_priority": 0.00002,
+    "regional_processing_uplift_multiplier_eu": 1.1,
+    "regional_processing_uplift_multiplier_us": 1.1,
     "supported_endpoints": [
       "/v1/chat/completions",
       "/v1/batch",
@@ -17818,6 +18541,8 @@
     "output_cost_per_token": 0.000002,
     "output_cost_per_token_flex": 0.000001,
     "output_cost_per_token_priority": 0.0000036,
+    "regional_processing_uplift_multiplier_eu": 1.1,
+    "regional_processing_uplift_multiplier_us": 1.1,
     "supported_endpoints": [
       "/v1/chat/completions",
       "/v1/batch",
@@ -17899,6 +18624,8 @@
     "max_input_tokens": 272000,
     "max_output_tokens": 128000,
     "max_tokens": 128000,
+    "regional_processing_uplift_multiplier_eu": 1.1,
+    "regional_processing_uplift_multiplier_us": 1.1,
     "mode": "chat",
     "output_cost_per_token": 4e-7,
     "output_cost_per_token_flex": 2e-7,
@@ -18930,6 +19657,38 @@
     "supports_system_messages": true,
     "supports_tool_choice": true
   },
+  "gpt-realtime-2": {
+    "cache_creation_input_audio_token_cost": 4e-7,
+    "cache_read_input_token_cost": 4e-7,
+    "input_cost_per_audio_token": 0.000032,
+    "input_cost_per_image": 0.000005,
+    "input_cost_per_token": 0.000004,
+    "litellm_provider": "openai",
+    "max_input_tokens": 32000,
+    "max_output_tokens": 4096,
+    "max_tokens": 4096,
+    "mode": "chat",
+    "output_cost_per_audio_token": 0.000064,
+    "output_cost_per_token": 0.000016,
+    "supported_endpoints": [
+      "/v1/realtime"
+    ],
+    "supported_modalities": [
+      "text",
+      "image",
+      "audio"
+    ],
+    "supported_output_modalities": [
+      "text",
+      "audio"
+    ],
+    "supports_audio_input": true,
+    "supports_audio_output": true,
+    "supports_function_calling": true,
+    "supports_parallel_function_calling": true,
+    "supports_system_messages": true,
+    "supports_tool_choice": true
+  },
   "gpt-realtime-2025-08-28": {
     "cache_creation_input_audio_token_cost": 4e-7,
     "cache_read_input_token_cost": 4e-7,
@@ -19763,7 +20522,6 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true
   },
   "jp.anthropic.claude-sonnet-4-5-20250929-v1:0": {
@@ -19794,9 +20552,36 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true
   },
+  "jp.anthropic.claude-sonnet-4-6": {
+    "cache_creation_input_token_cost": 0.000004125,
+    "cache_read_input_token_cost": 3.3e-7,
+    "input_cost_per_token": 0.0000033,
+    "litellm_provider": "bedrock_converse",
+    "max_input_tokens": 1000000,
+    "max_output_tokens": 64000,
+    "max_tokens": 64000,
+    "mode": "chat",
+    "output_cost_per_token": 0.0000165,
+    "search_context_cost_per_query": {
+      "search_context_size_high": 0.01,
+      "search_context_size_low": 0.01,
+      "search_context_size_medium": 0.01
+    },
+    "supports_assistant_prefill": true,
+    "supports_computer_use": true,
+    "supports_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_max_reasoning_effort": true,
+    "supports_tool_choice": true,
+    "supports_vision": true,
+    "supports_native_structured_output": true,
+    "supports_output_config": true
+  },
   "kimi-k2-thinking-251104": {
     "input_cost_per_token": 0,
     "litellm_provider": "volcengine",
@@ -21123,6 +21908,21 @@
     "supports_tool_choice": true,
     "supports_vision": true
   },
+  "mistral/ministral-8b-2512": {
+    "input_cost_per_token": 1.5e-7,
+    "litellm_provider": "mistral",
+    "max_input_tokens": 262144,
+    "max_output_tokens": 262144,
+    "max_tokens": 262144,
+    "mode": "chat",
+    "output_cost_per_token": 1.5e-7,
+    "source": "https://mistral.ai/pricing",
+    "supports_assistant_prefill": true,
+    "supports_function_calling": true,
+    "supports_response_schema": true,
+    "supports_tool_choice": true,
+    "supports_vision": true
+  },
   "mistral/mistral-large-2402": {
     "input_cost_per_token": 0.000004,
     "litellm_provider": "mistral",
@@ -23690,7 +24490,21 @@
     "output_cost_per_token": 0.00000156,
     "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/",
     "supports_function_calling": true,
-    "supports_response_schema": false
+    "supports_response_schema": false,
+    "supports_native_streaming": true
+  },
+  "oci/cohere.command-a-reasoning": {
+    "input_cost_per_token": 0.00000156,
+    "litellm_provider": "oci",
+    "max_input_tokens": 256000,
+    "max_output_tokens": 8192,
+    "max_tokens": 8192,
+    "mode": "chat",
+    "output_cost_per_token": 0.00000156,
+    "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/",
+    "supports_function_calling": false,
+    "supports_response_schema": false,
+    "supports_native_streaming": true
   },
   "oci/cohere.command-a-reasoning-08-2025": {
     "input_cost_per_token": 0.00000156,
@@ -23716,6 +24530,20 @@
     "supports_function_calling": false,
     "supports_response_schema": false
   },
+  "oci/cohere.command-a-vision": {
+    "input_cost_per_token": 0.00000156,
+    "litellm_provider": "oci",
+    "max_input_tokens": 256000,
+    "max_output_tokens": 8192,
+    "max_tokens": 8192,
+    "mode": "chat",
+    "output_cost_per_token": 0.00000156,
+    "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/",
+    "supports_function_calling": true,
+    "supports_response_schema": false,
+    "supports_native_streaming": true,
+    "supports_vision": true
+  },
   "oci/cohere.command-a-vision-07-2025": {
     "input_cost_per_token": 0.00000156,
     "litellm_provider": "oci",
@@ -23739,7 +24567,8 @@
     "output_cost_per_token": 0.00000156,
     "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/",
     "supports_function_calling": true,
-    "supports_response_schema": false
+    "supports_response_schema": false,
+    "supports_native_streaming": true
   },
   "oci/cohere.command-plus-latest": {
     "input_cost_per_token": 0.00000156,
@@ -23751,7 +24580,8 @@
     "output_cost_per_token": 0.00000156,
     "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/",
     "supports_function_calling": true,
-    "supports_response_schema": false
+    "supports_response_schema": false,
+    "supports_native_streaming": true
   },
   "oci/cohere.command-r-08-2024": {
     "input_cost_per_token": 1.5e-7,
@@ -23788,7 +24618,8 @@
     "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
     "supports_function_calling": true,
     "supports_response_schema": true,
-    "supports_vision": true
+    "supports_vision": true,
+    "supports_native_streaming": true
   },
   "oci/google.gemini-2.5-flash-lite": {
     "input_cost_per_token": 7.5e-8,
@@ -23801,7 +24632,8 @@
     "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
     "supports_function_calling": true,
     "supports_response_schema": true,
-    "supports_vision": true
+    "supports_vision": true,
+    "supports_native_streaming": true
   },
   "oci/google.gemini-2.5-pro": {
     "input_cost_per_token": 0.00000125,
@@ -23814,7 +24646,8 @@
     "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
     "supports_function_calling": true,
     "supports_response_schema": true,
-    "supports_vision": true
+    "supports_vision": true,
+    "supports_native_streaming": true
   },
   "oci/meta.llama-3.1-405b-instruct": {
     "input_cost_per_token": 0.00001068,
@@ -23826,7 +24659,8 @@
     "output_cost_per_token": 0.00001068,
     "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
     "supports_function_calling": true,
-    "supports_response_schema": false
+    "supports_response_schema": false,
+    "supports_native_streaming": true
   },
   "oci/meta.llama-3.1-70b-instruct": {
     "input_cost_per_token": 7.2e-7,
@@ -23838,7 +24672,21 @@
     "output_cost_per_token": 7.2e-7,
     "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
     "supports_function_calling": true,
-    "supports_response_schema": false
+    "supports_response_schema": false,
+    "supports_native_streaming": true
+  },
+  "oci/meta.llama-3.1-8b-instruct": {
+    "input_cost_per_token": 7.2e-7,
+    "litellm_provider": "oci",
+    "max_input_tokens": 128000,
+    "max_output_tokens": 4000,
+    "max_tokens": 4000,
+    "mode": "chat",
+    "output_cost_per_token": 7.2e-7,
+    "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
+    "supports_function_calling": true,
+    "supports_response_schema": false,
+    "supports_native_streaming": true
   },
   "oci/meta.llama-3.2-11b-vision-instruct": {
     "input_cost_per_token": 0.000002,
@@ -23864,6 +24712,7 @@
     "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
     "supports_function_calling": true,
     "supports_response_schema": false,
+    "supports_native_streaming": true,
     "supports_vision": true
   },
   "oci/meta.llama-3.3-70b-instruct": {
@@ -23876,7 +24725,8 @@
     "output_cost_per_token": 7.2e-7,
     "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
     "supports_function_calling": true,
-    "supports_response_schema": false
+    "supports_response_schema": false,
+    "supports_native_streaming": true
   },
   "oci/meta.llama-3.3-70b-instruct-fp8-dynamic": {
     "input_cost_per_token": 7.2e-7,
@@ -23893,26 +24743,74 @@
   "oci/meta.llama-4-maverick-17b-128e-instruct-fp8": {
     "input_cost_per_token": 7.2e-7,
     "litellm_provider": "oci",
-    "max_input_tokens": 512000,
-    "max_output_tokens": 4000,
-    "max_tokens": 4000,
+    "max_input_tokens": 1048576,
+    "max_output_tokens": 8192,
+    "max_tokens": 8192,
     "mode": "chat",
     "output_cost_per_token": 7.2e-7,
     "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
     "supports_function_calling": true,
-    "supports_response_schema": false
+    "supports_response_schema": false,
+    "supports_native_streaming": true,
+    "supports_vision": true
   },
   "oci/meta.llama-4-scout-17b-16e-instruct": {
     "input_cost_per_token": 7.2e-7,
     "litellm_provider": "oci",
-    "max_input_tokens": 192000,
-    "max_output_tokens": 4000,
-    "max_tokens": 4000,
+    "max_input_tokens": 10485760,
+    "max_output_tokens": 8192,
+    "max_tokens": 8192,
     "mode": "chat",
     "output_cost_per_token": 7.2e-7,
     "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
     "supports_function_calling": true,
-    "supports_response_schema": false
+    "supports_response_schema": false,
+    "supports_native_streaming": true
+  },
+  "oci/openai.gpt-5": {
+    "input_cost_per_token": 0.00000125,
+    "litellm_provider": "oci",
+    "max_input_tokens": 272000,
+    "max_output_tokens": 128000,
+    "max_tokens": 128000,
+    "mode": "chat",
+    "output_cost_per_token": 0.00001,
+    "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
+    "supports_function_calling": true,
+    "supports_native_streaming": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_vision": true
+  },
+  "oci/openai.gpt-5-mini": {
+    "input_cost_per_token": 2.5e-7,
+    "litellm_provider": "oci",
+    "max_input_tokens": 272000,
+    "max_output_tokens": 128000,
+    "max_tokens": 128000,
+    "mode": "chat",
+    "output_cost_per_token": 0.000002,
+    "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
+    "supports_function_calling": true,
+    "supports_native_streaming": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_vision": true
+  },
+  "oci/openai.gpt-5-nano": {
+    "input_cost_per_token": 5e-8,
+    "litellm_provider": "oci",
+    "max_input_tokens": 272000,
+    "max_output_tokens": 128000,
+    "max_tokens": 128000,
+    "mode": "chat",
+    "output_cost_per_token": 4e-7,
+    "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
+    "supports_function_calling": true,
+    "supports_native_streaming": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_vision": true
   },
   "oci/xai.grok-3": {
     "input_cost_per_token": 0.000003,
@@ -23924,7 +24822,8 @@
     "output_cost_per_token": 0.000015,
     "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
     "supports_function_calling": true,
-    "supports_response_schema": false
+    "supports_response_schema": false,
+    "supports_native_streaming": true
   },
   "oci/xai.grok-3-fast": {
     "input_cost_per_token": 0.000005,
@@ -23936,7 +24835,8 @@
     "output_cost_per_token": 0.000025,
     "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
     "supports_function_calling": true,
-    "supports_response_schema": false
+    "supports_response_schema": false,
+    "supports_native_streaming": true
   },
   "oci/xai.grok-3-mini": {
     "input_cost_per_token": 3e-7,
@@ -23948,7 +24848,8 @@
     "output_cost_per_token": 5e-7,
     "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
     "supports_function_calling": true,
-    "supports_response_schema": false
+    "supports_response_schema": false,
+    "supports_native_streaming": true
   },
   "oci/xai.grok-3-mini-fast": {
     "input_cost_per_token": 6e-7,
@@ -23960,7 +24861,8 @@
     "output_cost_per_token": 0.000004,
     "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
     "supports_function_calling": true,
-    "supports_response_schema": false
+    "supports_response_schema": false,
+    "supports_native_streaming": true
   },
   "oci/xai.grok-4": {
     "input_cost_per_token": 0.000003,
@@ -23972,7 +24874,8 @@
     "output_cost_per_token": 0.000015,
     "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing",
     "supports_function_calling": true,
-    "supports_response_schema": false
+    "supports_response_schema": false,
+    "supports_native_streaming": true
   },
   "oci/xai.grok-4-fast": {
     "input_cost_per_token": 0.000005,
@@ -24313,8 +25216,7 @@
     "supports_computer_use": true,
     "supports_function_calling": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "openrouter/anthropic/claude-3.7-sonnet": {
     "input_cost_per_image": 0.0048,
@@ -24330,8 +25232,7 @@
     "supports_function_calling": true,
     "supports_reasoning": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "openrouter/anthropic/claude-haiku-4.5": {
     "cache_creation_input_token_cost": 0.00000125,
@@ -24349,8 +25250,7 @@
     "supports_prompt_caching": true,
     "supports_reasoning": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346
+    "supports_vision": true
   },
   "openrouter/anthropic/claude-opus-4": {
     "input_cost_per_image": 0.0048,
@@ -24369,8 +25269,7 @@
     "supports_prompt_caching": true,
     "supports_reasoning": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "openrouter/anthropic/claude-opus-4.1": {
     "input_cost_per_image": 0.0048,
@@ -24390,8 +25289,7 @@
     "supports_prompt_caching": true,
     "supports_reasoning": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "openrouter/anthropic/claude-opus-4.5": {
     "cache_creation_input_token_cost": 0.00000625,
@@ -24406,12 +25304,11 @@
     "supports_assistant_prefill": true,
     "supports_computer_use": true,
     "supports_function_calling": true,
-    "supports_minimal_reasoning_effort": true,
     "supports_prompt_caching": true,
     "supports_reasoning": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_output_config": true
   },
   "openrouter/anthropic/claude-opus-4.6": {
     "cache_creation_input_token_cost": 0.00000625,
@@ -24430,9 +25327,7 @@
     "supports_reasoning": true,
     "supports_max_reasoning_effort": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
-    "supports_minimal_reasoning_effort": true
+    "supports_vision": true
   },
   "openrouter/anthropic/claude-opus-4.7": {
     "cache_creation_input_token_cost": 0.00000625,
@@ -24454,8 +25349,7 @@
     "supports_max_reasoning_effort": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "supports_xhigh_reasoning_effort": true,
-    "tool_use_system_prompt_tokens": 346
+    "supports_xhigh_reasoning_effort": true
   },
   "openrouter/anthropic/claude-sonnet-4": {
     "input_cost_per_image": 0.0048,
@@ -24478,8 +25372,7 @@
     "supports_prompt_caching": true,
     "supports_reasoning": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "openrouter/anthropic/claude-sonnet-4.5": {
     "input_cost_per_image": 0.0048,
@@ -24502,8 +25395,7 @@
     "supports_prompt_caching": true,
     "supports_reasoning": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "openrouter/anthropic/claude-sonnet-4.6": {
     "cache_creation_input_token_cost": 0.00000375,
@@ -24527,9 +25419,7 @@
     "supports_reasoning": true,
     "supports_max_reasoning_effort": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159,
-    "supports_minimal_reasoning_effort": true
+    "supports_vision": true
   },
   "openrouter/bytedance/ui-tars-1.5-7b": {
     "input_cost_per_token": 1e-7,
@@ -24800,6 +25690,58 @@
     "supports_vision": true,
     "supports_web_search": true
   },
+  "openrouter/google/gemini-3.1-flash-lite": {
+    "cache_read_input_token_cost": 2.5e-8,
+    "cache_read_input_token_cost_per_audio_token": 5e-8,
+    "input_cost_per_audio_token": 5e-7,
+    "input_cost_per_token": 2.5e-7,
+    "litellm_provider": "openrouter",
+    "max_audio_length_hours": 8.4,
+    "max_audio_per_prompt": 1,
+    "max_images_per_prompt": 3000,
+    "max_input_tokens": 1048576,
+    "max_output_tokens": 65536,
+    "max_pdf_size_mb": 30,
+    "max_tokens": 65536,
+    "max_video_length": 1,
+    "max_videos_per_prompt": 10,
+    "mode": "chat",
+    "output_cost_per_reasoning_token": 0.0000015,
+    "output_cost_per_token": 0.0000015,
+    "rpm": 2000,
+    "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-3.1-flash-lite",
+    "supported_endpoints": [
+      "/v1/chat/completions",
+      "/v1/completions",
+      "/v1/batch"
+    ],
+    "supported_modalities": [
+      "text",
+      "image",
+      "audio",
+      "video"
+    ],
+    "supported_output_modalities": [
+      "text"
+    ],
+    "supports_audio_input": true,
+    "supports_audio_output": false,
+    "supports_code_execution": true,
+    "supports_file_search": true,
+    "supports_function_calling": true,
+    "supports_parallel_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_system_messages": true,
+    "supports_tool_choice": true,
+    "supports_url_context": true,
+    "supports_video_input": true,
+    "supports_vision": true,
+    "supports_web_search": true,
+    "tpm": 800000
+  },
   "openrouter/google/gemini-3.1-flash-lite-preview": {
     "cache_read_input_token_cost": 2.5e-8,
     "cache_read_input_token_cost_per_audio_token": 5e-8,
@@ -25653,6 +26595,20 @@
     "supports_tool_choice": true,
     "supports_vision": true
   },
+  "openrouter/qwen/qwen3.6-plus": {
+    "input_cost_per_token": 3.25e-7,
+    "litellm_provider": "openrouter",
+    "max_input_tokens": 1000000,
+    "max_output_tokens": 65536,
+    "max_tokens": 65536,
+    "mode": "chat",
+    "output_cost_per_token": 0.00000195,
+    "source": "https://openrouter.ai/qwen/qwen3.6-plus",
+    "supports_function_calling": true,
+    "supports_reasoning": true,
+    "supports_tool_choice": true,
+    "supports_vision": true
+  },
   "openrouter/switchpoint/router": {
     "input_cost_per_token": 8.5e-7,
     "litellm_provider": "openrouter",
@@ -25689,10 +26645,10 @@
     "supports_web_search": true
   },
   "openrouter/xiaomi/mimo-v2-flash": {
-    "input_cost_per_token": 9e-8,
-    "output_cost_per_token": 2.9e-7,
+    "input_cost_per_token": 1e-7,
+    "output_cost_per_token": 3e-7,
     "cache_creation_input_token_cost": 0,
-    "cache_read_input_token_cost": 0,
+    "cache_read_input_token_cost": 1e-8,
     "litellm_provider": "openrouter",
     "max_input_tokens": 262144,
     "max_output_tokens": 16384,
@@ -25702,7 +26658,43 @@
     "supports_tool_choice": true,
     "supports_reasoning": true,
     "supports_vision": false,
-    "supports_prompt_caching": false
+    "supports_prompt_caching": true
+  },
+  "openrouter/xiaomi/mimo-v2.5": {
+    "input_cost_per_token": 4e-7,
+    "output_cost_per_token": 0.000002,
+    "cache_creation_input_token_cost": 0,
+    "cache_read_input_token_cost": 8e-8,
+    "litellm_provider": "openrouter",
+    "max_input_tokens": 1048576,
+    "max_output_tokens": 131072,
+    "max_tokens": 131072,
+    "mode": "chat",
+    "supports_function_calling": true,
+    "supports_tool_choice": true,
+    "supports_reasoning": true,
+    "supports_vision": true,
+    "supports_audio_input": true,
+    "supports_video_input": true,
+    "supports_response_schema": true,
+    "supports_prompt_caching": true
+  },
+  "openrouter/xiaomi/mimo-v2.5-pro": {
+    "input_cost_per_token": 0.000001,
+    "output_cost_per_token": 0.000003,
+    "cache_creation_input_token_cost": 0,
+    "cache_read_input_token_cost": 2e-7,
+    "litellm_provider": "openrouter",
+    "max_input_tokens": 1048576,
+    "max_output_tokens": 16384,
+    "max_tokens": 16384,
+    "mode": "chat",
+    "supports_function_calling": true,
+    "supports_tool_choice": true,
+    "supports_reasoning": true,
+    "supports_vision": false,
+    "supports_response_schema": true,
+    "supports_prompt_caching": true
   },
   "openrouter/z-ai/glm-4.6": {
     "input_cost_per_token": 4e-7,
@@ -27014,6 +28006,19 @@
     "output_cost_per_token": 3e-7,
     "source": "https://cloud.sambanova.ai/plans/pricing"
   },
+  "sambanova/MiniMax-M2.7": {
+    "input_cost_per_token": 3e-7,
+    "litellm_provider": "sambanova",
+    "max_input_tokens": 204800,
+    "max_output_tokens": 131072,
+    "max_tokens": 131072,
+    "mode": "chat",
+    "output_cost_per_token": 0.0000012,
+    "source": "https://cloud.sambanova.ai/plans/pricing",
+    "supports_function_calling": true,
+    "supports_reasoning": true,
+    "supports_tool_choice": true
+  },
   "sambanova/QwQ-32B": {
     "input_cost_per_token": 5e-7,
     "litellm_provider": "sambanova",
@@ -27675,7 +28680,6 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true
   },
   "us.amazon.nova-2-lite-v1:0": {
@@ -27908,7 +28912,6 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true
   },
   "us.anthropic.claude-opus-4-1-20250805-v1:0": {
@@ -27934,8 +28937,7 @@
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "us.anthropic.claude-opus-4-20250514-v1:0": {
     "cache_creation_input_token_cost": 0.00001875,
@@ -27946,7 +28948,33 @@
     "max_output_tokens": 32000,
     "max_tokens": 32000,
     "mode": "chat",
-    "output_cost_per_token": 0.000075,
+    "output_cost_per_token": 0.000075,
+    "search_context_cost_per_query": {
+      "search_context_size_high": 0.01,
+      "search_context_size_low": 0.01,
+      "search_context_size_medium": 0.01
+    },
+    "supports_assistant_prefill": true,
+    "supports_computer_use": true,
+    "supports_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_tool_choice": true,
+    "supports_vision": true
+  },
+  "us.anthropic.claude-opus-4-5-20251101-v1:0": {
+    "cache_creation_input_token_cost": 0.000006875,
+    "cache_creation_input_token_cost_above_1hr": 0.000011,
+    "cache_read_input_token_cost": 5.5e-7,
+    "input_cost_per_token": 0.0000055,
+    "litellm_provider": "bedrock_converse",
+    "max_input_tokens": 200000,
+    "max_output_tokens": 64000,
+    "max_tokens": 64000,
+    "mode": "chat",
+    "output_cost_per_token": 0.0000275,
     "search_context_cost_per_query": {
       "search_context_size_high": 0.01,
       "search_context_size_low": 0.01,
@@ -27961,17 +28989,19 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_native_structured_output": true,
+    "supports_output_config": true,
+    "bedrock_output_config_effort_ceiling": "high"
   },
-  "us.anthropic.claude-opus-4-5-20251101-v1:0": {
+  "us.anthropic.claude-opus-4-6-v1": {
     "cache_creation_input_token_cost": 0.000006875,
     "cache_creation_input_token_cost_above_1hr": 0.000011,
     "cache_read_input_token_cost": 5.5e-7,
     "input_cost_per_token": 0.0000055,
     "litellm_provider": "bedrock_converse",
-    "max_input_tokens": 200000,
-    "max_output_tokens": 64000,
-    "max_tokens": 64000,
+    "max_input_tokens": 1000000,
+    "max_output_tokens": 128000,
+    "max_tokens": 128000,
     "mode": "chat",
     "output_cost_per_token": 0.0000275,
     "search_context_cost_per_query": {
@@ -27979,20 +29009,21 @@
       "search_context_size_low": 0.01,
       "search_context_size_medium": 0.01
     },
-    "supports_assistant_prefill": true,
+    "supports_assistant_prefill": false,
     "supports_computer_use": true,
     "supports_function_calling": true,
-    "supports_minimal_reasoning_effort": true,
     "supports_pdf_input": true,
     "supports_prompt_caching": true,
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159,
-    "supports_native_structured_output": true
+    "supports_native_structured_output": true,
+    "supports_output_config": true,
+    "supports_max_reasoning_effort": true,
+    "bedrock_output_config_effort_ceiling": "max"
   },
-  "us.anthropic.claude-opus-4-6-v1": {
+  "us.anthropic.claude-opus-4-7": {
     "cache_creation_input_token_cost": 0.000006875,
     "cache_creation_input_token_cost_above_1hr": 0.000011,
     "cache_read_input_token_cost": 5.5e-7,
@@ -28017,12 +29048,13 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
+    "supports_xhigh_reasoning_effort": true,
     "supports_native_structured_output": true,
     "supports_max_reasoning_effort": true,
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true,
+    "bedrock_output_config_effort_ceiling": "xhigh"
   },
-  "us.anthropic.claude-opus-4-7": {
+  "us.anthropic.claude-opus-4-8": {
     "cache_creation_input_token_cost": 0.000006875,
     "cache_creation_input_token_cost_above_1hr": 0.000011,
     "cache_read_input_token_cost": 5.5e-7,
@@ -28048,10 +29080,10 @@
     "supports_tool_choice": true,
     "supports_vision": true,
     "supports_xhigh_reasoning_effort": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true,
     "supports_max_reasoning_effort": true,
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true,
+    "bedrock_output_config_effort_ceiling": "xhigh"
   },
   "us.anthropic.claude-sonnet-4-20250514-v1:0": {
     "cache_creation_input_token_cost": 0.00000375,
@@ -28080,8 +29112,7 @@
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "us.anthropic.claude-sonnet-4-5-20250929-v1:0": {
     "cache_creation_input_token_cost": 0.000004125,
@@ -28113,7 +29144,6 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true
   },
   "us.anthropic.claude-sonnet-4-6": {
@@ -28142,9 +29172,8 @@
     "supports_max_reasoning_effort": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "supports_native_structured_output": true,
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true
   },
   "us.deepseek.r1-v1:0": {
     "input_cost_per_token": 0.00000135,
@@ -28704,13 +29733,13 @@
     "output_cost_per_token": 0.000025,
     "supports_assistant_prefill": true,
     "supports_computer_use": true,
-    "supports_minimal_reasoning_effort": true,
     "supports_function_calling": true,
     "supports_prompt_caching": true,
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true
+    "supports_vision": true,
+    "supports_output_config": true
   },
   "vercel_ai_gateway/anthropic/claude-opus-4.6": {
     "cache_creation_input_token_cost": 0.00000625,
@@ -28730,7 +29759,7 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true
   },
   "vercel_ai_gateway/anthropic/claude-sonnet-4": {
     "cache_creation_input_token_cost": 0.00000375,
@@ -29633,8 +30662,7 @@
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "vertex_ai/claude-3-haiku": {
     "input_cost_per_token": 2.5e-7,
@@ -29779,8 +30807,7 @@
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "vertex_ai/claude-opus-4-1": {
     "cache_creation_input_token_cost": 0.00001875,
@@ -29834,14 +30861,13 @@
     "supports_assistant_prefill": true,
     "supports_computer_use": true,
     "supports_function_calling": true,
-    "supports_minimal_reasoning_effort": true,
     "supports_pdf_input": true,
     "supports_prompt_caching": true,
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_output_config": true
   },
   "vertex_ai/claude-opus-4-5@20251101": {
     "cache_creation_input_token_cost": 0.00000625,
@@ -29861,15 +30887,14 @@
     "supports_assistant_prefill": true,
     "supports_computer_use": true,
     "supports_function_calling": true,
-    "supports_minimal_reasoning_effort": true,
     "supports_pdf_input": true,
     "supports_prompt_caching": true,
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159,
-    "supports_native_streaming": true
+    "supports_native_streaming": true,
+    "supports_output_config": true
   },
   "vertex_ai/claude-opus-4-6": {
     "cache_creation_input_token_cost": 0.00000625,
@@ -29895,9 +30920,8 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
-    "supports_max_reasoning_effort": true,
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true,
+    "supports_max_reasoning_effort": true
   },
   "vertex_ai/claude-opus-4-6@default": {
     "cache_creation_input_token_cost": 0.00000625,
@@ -29923,9 +30947,8 @@
     "supports_response_schema": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
-    "supports_max_reasoning_effort": true,
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true,
+    "supports_max_reasoning_effort": true
   },
   "vertex_ai/claude-opus-4-7": {
     "cache_creation_input_token_cost": 0.00000625,
@@ -29952,9 +30975,7 @@
     "supports_tool_choice": true,
     "supports_vision": true,
     "supports_xhigh_reasoning_effort": true,
-    "tool_use_system_prompt_tokens": 346,
-    "supports_max_reasoning_effort": true,
-    "supports_minimal_reasoning_effort": true
+    "supports_max_reasoning_effort": true
   },
   "vertex_ai/claude-opus-4-7@default": {
     "cache_creation_input_token_cost": 0.00000625,
@@ -29981,9 +31002,63 @@
     "supports_tool_choice": true,
     "supports_vision": true,
     "supports_xhigh_reasoning_effort": true,
-    "tool_use_system_prompt_tokens": 346,
-    "supports_max_reasoning_effort": true,
-    "supports_minimal_reasoning_effort": true
+    "supports_max_reasoning_effort": true
+  },
+  "vertex_ai/claude-opus-4-8": {
+    "cache_creation_input_token_cost": 0.00000625,
+    "cache_creation_input_token_cost_above_1hr": 0.00001,
+    "cache_read_input_token_cost": 5e-7,
+    "input_cost_per_token": 0.000005,
+    "litellm_provider": "vertex_ai-anthropic_models",
+    "max_input_tokens": 1000000,
+    "max_output_tokens": 128000,
+    "max_tokens": 128000,
+    "mode": "chat",
+    "output_cost_per_token": 0.000025,
+    "search_context_cost_per_query": {
+      "search_context_size_high": 0.01,
+      "search_context_size_low": 0.01,
+      "search_context_size_medium": 0.01
+    },
+    "supports_assistant_prefill": false,
+    "supports_computer_use": true,
+    "supports_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_tool_choice": true,
+    "supports_vision": true,
+    "supports_xhigh_reasoning_effort": true,
+    "supports_max_reasoning_effort": true
+  },
+  "vertex_ai/claude-opus-4-8@default": {
+    "cache_creation_input_token_cost": 0.00000625,
+    "cache_creation_input_token_cost_above_1hr": 0.00001,
+    "cache_read_input_token_cost": 5e-7,
+    "input_cost_per_token": 0.000005,
+    "litellm_provider": "vertex_ai-anthropic_models",
+    "max_input_tokens": 1000000,
+    "max_output_tokens": 128000,
+    "max_tokens": 128000,
+    "mode": "chat",
+    "output_cost_per_token": 0.000025,
+    "search_context_cost_per_query": {
+      "search_context_size_high": 0.01,
+      "search_context_size_low": 0.01,
+      "search_context_size_medium": 0.01
+    },
+    "supports_assistant_prefill": false,
+    "supports_computer_use": true,
+    "supports_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_tool_choice": true,
+    "supports_vision": true,
+    "supports_xhigh_reasoning_effort": true,
+    "supports_max_reasoning_effort": true
   },
   "vertex_ai/claude-opus-4@20250514": {
     "cache_creation_input_token_cost": 0.00001875,
@@ -30008,8 +31083,7 @@
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "vertex_ai/claude-sonnet-4": {
     "cache_creation_input_token_cost": 0.00000375,
@@ -30038,8 +31112,7 @@
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "vertex_ai/claude-sonnet-4-5": {
     "cache_creation_input_token_cost": 0.00000375,
@@ -30114,13 +31187,12 @@
     "supports_max_reasoning_effort": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "search_context_cost_per_query": {
       "search_context_size_high": 0.01,
       "search_context_size_low": 0.01,
       "search_context_size_medium": 0.01
     },
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true
   },
   "vertex_ai/claude-sonnet-4-6@default": {
     "cache_creation_input_token_cost": 0.00000375,
@@ -30142,13 +31214,12 @@
     "supports_max_reasoning_effort": true,
     "supports_tool_choice": true,
     "supports_vision": true,
-    "tool_use_system_prompt_tokens": 346,
     "search_context_cost_per_query": {
       "search_context_size_high": 0.01,
       "search_context_size_low": 0.01,
       "search_context_size_medium": 0.01
     },
-    "supports_minimal_reasoning_effort": true
+    "supports_output_config": true
   },
   "vertex_ai/claude-sonnet-4@20250514": {
     "cache_creation_input_token_cost": 0.00000375,
@@ -30177,8 +31248,7 @@
     "supports_reasoning": true,
     "supports_response_schema": true,
     "supports_tool_choice": true,
-    "supports_vision": true,
-    "tool_use_system_prompt_tokens": 159
+    "supports_vision": true
   },
   "vertex_ai/codestral-2": {
     "input_cost_per_token": 3e-7,
@@ -30457,6 +31527,73 @@
     },
     "web_search_billing_unit": "per_query"
   },
+  "vertex_ai/gemini-3.1-flash-lite": {
+    "cache_read_input_token_cost": 2.5e-8,
+    "cache_read_input_token_cost_batches": 1.25e-8,
+    "cache_read_input_token_cost_flex": 1.25e-8,
+    "cache_read_input_token_cost_per_audio_token": 5e-8,
+    "cache_read_input_token_cost_priority": 4.5e-8,
+    "input_cost_per_audio_token": 5e-7,
+    "input_cost_per_token": 2.5e-7,
+    "input_cost_per_token_batches": 1.25e-7,
+    "input_cost_per_token_flex": 1.25e-7,
+    "input_cost_per_token_priority": 4.5e-7,
+    "litellm_provider": "vertex_ai-language-models",
+    "max_audio_length_hours": 8.4,
+    "max_audio_per_prompt": 1,
+    "max_images_per_prompt": 3000,
+    "max_input_tokens": 1048576,
+    "max_output_tokens": 65536,
+    "max_pdf_size_mb": 30,
+    "max_tokens": 65536,
+    "max_video_length": 1,
+    "max_videos_per_prompt": 10,
+    "mode": "chat",
+    "output_cost_per_reasoning_token": 0.0000015,
+    "output_cost_per_token": 0.0000015,
+    "output_cost_per_token_batches": 7.5e-7,
+    "output_cost_per_token_flex": 7.5e-7,
+    "output_cost_per_token_priority": 0.0000027,
+    "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models",
+    "supported_endpoints": [
+      "/v1/chat/completions",
+      "/v1/completions",
+      "/v1/batch"
+    ],
+    "supported_modalities": [
+      "text",
+      "image",
+      "audio",
+      "video"
+    ],
+    "supported_output_modalities": [
+      "text"
+    ],
+    "supports_audio_input": true,
+    "supports_audio_output": false,
+    "supports_code_execution": true,
+    "supports_file_search": true,
+    "supports_function_calling": true,
+    "supports_parallel_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_system_messages": true,
+    "supports_tool_choice": true,
+    "supports_url_context": true,
+    "supports_video_input": true,
+    "supports_vision": true,
+    "supports_web_search": true,
+    "supports_native_streaming": true,
+    "search_context_cost_per_query": {
+      "search_context_size_low": 0.014,
+      "search_context_size_medium": 0.014,
+      "search_context_size_high": 0.014
+    },
+    "web_search_billing_unit": "per_query",
+    "supports_service_tier": true
+  },
   "vertex_ai/gemini-3.1-flash-lite-preview": {
     "cache_read_input_token_cost": 2.5e-8,
     "cache_read_input_token_cost_per_audio_token": 5e-8,
@@ -30642,6 +31779,64 @@
     },
     "web_search_billing_unit": "per_query"
   },
+  "vertex_ai/gemini-3.5-flash": {
+    "cache_read_input_token_cost": 1.5e-7,
+    "input_cost_per_token": 0.0000015,
+    "input_cost_per_audio_token": 0.000001,
+    "litellm_provider": "vertex_ai",
+    "max_audio_length_hours": 8.4,
+    "max_audio_per_prompt": 1,
+    "max_images_per_prompt": 3000,
+    "max_input_tokens": 1048576,
+    "max_output_tokens": 65535,
+    "max_pdf_size_mb": 30,
+    "max_tokens": 65535,
+    "max_video_length": 1,
+    "max_videos_per_prompt": 10,
+    "mode": "chat",
+    "output_cost_per_reasoning_token": 0.000009,
+    "output_cost_per_token": 0.000009,
+    "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
+    "supported_endpoints": [
+      "/v1/chat/completions",
+      "/v1/completions",
+      "/v1/batch"
+    ],
+    "supported_modalities": [
+      "text",
+      "image",
+      "audio",
+      "video"
+    ],
+    "supported_output_modalities": [
+      "text"
+    ],
+    "supports_audio_input": true,
+    "supports_function_calling": true,
+    "supports_parallel_function_calling": true,
+    "supports_pdf_input": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_system_messages": true,
+    "supports_tool_choice": true,
+    "supports_url_context": true,
+    "supports_video_input": true,
+    "supports_vision": true,
+    "supports_web_search": true,
+    "supports_native_streaming": true,
+    "input_cost_per_token_priority": 0.0000027,
+    "input_cost_per_audio_token_priority": 0.0000018,
+    "output_cost_per_token_priority": 0.0000162,
+    "cache_read_input_token_cost_priority": 2.7e-7,
+    "supports_service_tier": true,
+    "search_context_cost_per_query": {
+      "search_context_size_low": 0.014,
+      "search_context_size_medium": 0.014,
+      "search_context_size_high": 0.014
+    },
+    "web_search_billing_unit": "per_query"
+  },
   "vertex_ai/jamba-1.5": {
     "input_cost_per_token": 2e-7,
     "litellm_provider": "vertex_ai-ai21_models",
@@ -32414,6 +33609,48 @@
     "supports_vision": true,
     "supports_web_search": true
   },
+  "xai/grok-4.3": {
+    "cache_read_input_token_cost": 2e-7,
+    "cache_read_input_token_cost_above_200k_tokens": 4e-7,
+    "input_cost_per_token": 0.00000125,
+    "input_cost_per_token_above_200k_tokens": 0.0000025,
+    "litellm_provider": "xai",
+    "max_input_tokens": 1000000,
+    "max_output_tokens": 1000000,
+    "max_tokens": 1000000,
+    "mode": "chat",
+    "output_cost_per_token": 0.0000025,
+    "output_cost_per_token_above_200k_tokens": 0.000005,
+    "source": "https://docs.x.ai/docs/models",
+    "supports_function_calling": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_tool_choice": true,
+    "supports_vision": true,
+    "supports_web_search": true
+  },
+  "xai/grok-4.3-latest": {
+    "cache_read_input_token_cost": 2e-7,
+    "cache_read_input_token_cost_above_200k_tokens": 4e-7,
+    "input_cost_per_token": 0.00000125,
+    "input_cost_per_token_above_200k_tokens": 0.0000025,
+    "litellm_provider": "xai",
+    "max_input_tokens": 1000000,
+    "max_output_tokens": 1000000,
+    "max_tokens": 1000000,
+    "mode": "chat",
+    "output_cost_per_token": 0.0000025,
+    "output_cost_per_token_above_200k_tokens": 0.000005,
+    "source": "https://docs.x.ai/docs/models",
+    "supports_function_calling": true,
+    "supports_prompt_caching": true,
+    "supports_reasoning": true,
+    "supports_response_schema": true,
+    "supports_tool_choice": true,
+    "supports_vision": true,
+    "supports_web_search": true
+  },
   "xai/grok-beta": {
     "input_cost_per_token": 0.000005,
     "litellm_provider": "xai",

From ea5e87b56f601f35539bd037279f2dcc6bcc0645 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Sun, 31 May 2026 14:54:27 -0700
Subject: [PATCH 28/37] feat: fix pipeline error in `cecli/io.py` by updating
 method signature to accept `**kwargs`

Co-authored-by: cecli (openai/gemini_cli/gemini-2.5-pro)
---
 cecli/io.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cecli/io.py b/cecli/io.py
index 923c795466d..59ebdeeec36 100644
--- a/cecli/io.py
+++ b/cecli/io.py
@@ -553,7 +553,7 @@ def _spinner_supports_unicode(self) -> bool:
         except Exception:
             return False
 
-    def start_spinner(self, text, update_last_text=True):
+    def start_spinner(self, text, update_last_text=True, **kwargs):
         """Start the spinner."""
         self.stop_spinner()
 

From d7264d31f6677f292c9de9d11d67ea2504c24fb2 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Sun, 31 May 2026 21:32:39 -0400
Subject: [PATCH 29/37] Only run reflection after observation completes, pass
 coder instance to summarizer so tool list can be fully reconstructed so it
 doesn't destroy cache in background

---
 cecli/coders/base_coder.py            |  2 +-
 cecli/helpers/observations/service.py | 29 ++++++++++++++++++---------
 cecli/history.py                      |  6 ++++--
 cecli/models.py                       | 15 ++++++++++++++
 cecli/prompts/base.yml                | 13 +++++++-----
 5 files changed, 47 insertions(+), 18 deletions(-)

diff --git a/cecli/coders/base_coder.py b/cecli/coders/base_coder.py
index b6dc6d6f69f..3bff272ed5f 100755
--- a/cecli/coders/base_coder.py
+++ b/cecli/coders/base_coder.py
@@ -1914,6 +1914,7 @@ async def summarize_and_update(messages, tag):
                     messages,
                     compaction_prompt,
                     self.context_compaction_summary_tokens,
+                    coder=self,
                 )
                 if not text:
                     raise ValueError(f"Summarization of {tag} messages returned empty.")
@@ -3272,7 +3273,6 @@ async def send(self, messages, model=None, functions=None, tools=None):
                 functions,
                 self.stream,
                 self.temperature,
-                # This could include any tools, but for now it is just MCP tools
                 tools=tools,
                 override_kwargs=self.model_kwargs.copy(),
                 interrupt_event=self.interrupt_event,
diff --git a/cecli/helpers/observations/service.py b/cecli/helpers/observations/service.py
index 14cd255255e..5a2a6ee4a05 100644
--- a/cecli/helpers/observations/service.py
+++ b/cecli/helpers/observations/service.py
@@ -38,6 +38,7 @@ def __init__(self, coder):
         self.observation_threshold = max((coder.context_compaction_max_tokens or 0) / 3, 20000)
         self.reflection_threshold = self.observation_threshold * 2
         self.is_processing = False
+        self.is_reflecting = False
         self._last_observed_index = 0
         self.observations = []  # Internal storage
 
@@ -70,13 +71,6 @@ async def check_and_trigger(self):
             asyncio.create_task(self.run_observation(unobserved))
             self._last_observed_index = len(cur_messages)
 
-        obs_tokens = coder.summarizer.count_tokens(
-            [{"role": "user", "content": o} for o in self.observations]
-        )
-
-        if obs_tokens >= self.reflection_threshold:
-            asyncio.create_task(self.run_reflection())
-
     async def run_observation(self, messages):
         coder = self.get_coder()
         if coder is None:
@@ -87,9 +81,17 @@ async def run_observation(self, messages):
             all_messages = ConversationService.get_manager(coder).get_messages_dict()
             prompt = coder.gpt_prompts.observation_prompt
             observation = await coder.summarizer.summarize_all_as_text(
-                all_messages, prompt, max_tokens=8192
+                all_messages, prompt, max_tokens=8192, coder=coder
             )
             self.observations.append(self.format_observation(observation))
+
+            obs_tokens = coder.summarizer.count_tokens(
+                [{"role": "user", "content": o} for o in self.observations]
+            )
+
+            if obs_tokens >= self.reflection_threshold:
+                await self.run_reflection()
+
         except asyncio.CancelledError:
             raise
         except Exception as e:
@@ -102,8 +104,14 @@ async def run_reflection(self):
         if coder is None:
             return
 
-        self.is_processing = True
+        if self.is_reflecting:
+            return
+
+        self.is_reflecting = True
         try:
+            if not self.observations:
+                return
+
             # Prepare observations for the reflector
             obs_text = "\n".join([f"- {o}" for o in self.observations])
 
@@ -113,6 +121,7 @@ async def run_reflection(self):
                 [{"role": "user", "content": obs_text}],
                 reflection_prompt,
                 max_tokens=8192,
+                coder=coder,
             )
 
             # 1. Internal State Update: Store the condensed log internally
@@ -124,7 +133,7 @@ async def run_reflection(self):
         except Exception as e:
             coder.io.tool_error(f"Error during reflection: {e}")
         finally:
-            self.is_processing = False
+            self.is_reflecting = False
 
     def reset(self):
         self.observations = []
diff --git a/cecli/history.py b/cecli/history.py
index 74b61d735c4..df38797f331 100644
--- a/cecli/history.py
+++ b/cecli/history.py
@@ -135,12 +135,14 @@ async def summarize_all(self, messages):
         print(err)
         raise ValueError(err)
 
-    async def summarize_all_as_text(self, messages, prompt, max_tokens=None):
+    async def summarize_all_as_text(self, messages, prompt, max_tokens=None, coder=None):
         messages.append(dict(role="user", content=prompt))
 
         for model in self.models:
             try:
-                summary = await model.simple_send_with_retries(messages, max_tokens=max_tokens)
+                summary = await model.simple_send_with_retries(
+                    messages, max_tokens=max_tokens, coder=coder
+                )
                 if summary is not None:
                     return summary
             except Exception as e:
diff --git a/cecli/models.py b/cecli/models.py
index 4148682b44b..dc0d66899c2 100644
--- a/cecli/models.py
+++ b/cecli/models.py
@@ -1333,19 +1333,34 @@ async def simple_send_with_retries(
         messages,
         max_tokens=None,
         override_kwargs={},
+        coder=None,
     ):
         from cecli.exceptions import LiteLLMExceptions
 
         litellm_ex = LiteLLMExceptions()
         retry_delay = 0.125
+        temperature = None
+        tools = None
+
         if self.verbose:
             dump(messages)
+
+        if coder:
+            temperature = coder.temperature
+            tools = coder.get_tool_list()
+            merged_kwargs = coder.model_kwargs.copy()
+            merged_kwargs.update(override_kwargs)
+            override_kwargs = merged_kwargs
+
         while True:
             try:
+
                 _hash, response = await self.send_completion(
                     messages=messages,
                     functions=None,
                     stream=False,
+                    temperature=temperature,
+                    tools=tools,
                     max_tokens=max_tokens,
                     override_kwargs=override_kwargs,
                 )
diff --git a/cecli/prompts/base.yml b/cecli/prompts/base.yml
index 11c54ad8096..19c788990f0 100644
--- a/cecli/prompts/base.yml
+++ b/cecli/prompts/base.yml
@@ -86,8 +86,10 @@ go_ahead_tip: ""
 compaction_prompt: |
   ---
   # Instruction: Context Compaction & State Preservation
-  The current conversation is exceeding the context limit. Transform this history into a "Mission Intent" summary that allows a new LLM instance to resume with zero loss of technical momentum.
-  
+  The current conversation is exceeding the context limit. 
+  Transform this history into a "Mission Intent" summary that allows a new session to resume with zero loss of technical momentum.
+  Do not call any tools in your response. Just generate your summary.
+
   ## Required Output Format:
 
   ### 1. Core Objective
@@ -111,12 +113,13 @@ observation_prompt: |
   Use a bulleted list of concise, factual statements.
   Document the sequence of **outcomes and milestones** reached. 
   Do not describe tool syntax; describe what was learned or changed in one sentence per bullet.
+  Do not call any tools in your response. Just generate your summary.
 
 reflection_prompt: |
   ---
-  Condense this observation log into a consolidated set of 
-  key historical facts. Additionally, explicitly state the 
-  CURRENT GOALS and NEXT STEPS based on the history.
+  Condense this observation log into a consolidated set of key historical facts. 
+  Additionally, explicitly state the CURRENT GOALS and NEXT STEPS based on the history.
+  Do not call any tools in your response. Just generate your summary.
 
   Format as:
   OBSERVATIONS:

From 83d78f1be97485767ff627d73bedcc08f15348a0 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Sun, 31 May 2026 22:17:04 -0400
Subject: [PATCH 30/37] Fix observation tests

---
 tests/helpers/observations/test_observation_service.py | 8 ++------
 tests/subagents/test_commands.py                       | 2 +-
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/tests/helpers/observations/test_observation_service.py b/tests/helpers/observations/test_observation_service.py
index 667e17ef77d..d51b8087da8 100644
--- a/tests/helpers/observations/test_observation_service.py
+++ b/tests/helpers/observations/test_observation_service.py
@@ -83,9 +83,7 @@ async def test_compact_context_with_observations():
     # 2. check_and_trigger: count_tokens(observations)
     # 3. compact_context_if_needed: done_tokens
     # 4. compact_context_if_needed: cur_tokens
-    # 5. compact_context_if_needed: diff_tokens
-    # 6. summarize_and_update: count_tokens inside
-    coder.summarizer.count_tokens.side_effect = [100, 100, 100, 1000, 0, 50]
+    coder.summarizer.count_tokens.side_effect = [100, 100, 1000, 0, 50]
     coder.summarizer.summarize_all_as_text = AsyncMock(return_value="Summary Text")
 
     # Mock manager
@@ -146,9 +144,7 @@ async def test_compact_context_with_observations_integration():
     # 2. check_and_trigger: obs
     # 3. compact: done
     # 4. compact: cur
-    # 5. compact: diff
-    # 6. summarize_and_update: inner
-    coder.summarizer.count_tokens.side_effect = [100, 100, 100, 1000, 0, 50]
+    coder.summarizer.count_tokens.side_effect = [100, 100, 1000, 0, 50]
     coder.summarizer.summarize_all_as_text = AsyncMock(return_value="Summary Text")
 
     # Mock manager
diff --git a/tests/subagents/test_commands.py b/tests/subagents/test_commands.py
index 4cb340c1c84..3413cd44867 100644
--- a/tests/subagents/test_commands.py
+++ b/tests/subagents/test_commands.py
@@ -31,7 +31,7 @@ async def test_valid_name_calls_spawn(self):
 
         with patch("cecli.helpers.agents.service.AgentService") as MockSvc:
             mock_instance = MagicMock()
-            mock_instance.spawn = AsyncMock()
+            mock_instance.spawn = AsyncMock(return_value=(MagicMock(), MagicMock()))
             MockSvc.get_instance.return_value = mock_instance
 
             await SpawnAgentCommand.execute(io, coder, "reviewer")

From 69fd020cd6d18717e69094e61a63541bfcb520c1 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Sun, 31 May 2026 22:19:15 -0400
Subject: [PATCH 31/37] Add quiet parameter from PR #536

---
 cecli/sessions.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/cecli/sessions.py b/cecli/sessions.py
index f1ee5a12570..2939431575c 100644
--- a/cecli/sessions.py
+++ b/cecli/sessions.py
@@ -88,7 +88,7 @@ def list_sessions(self) -> List[Dict]:
 
         return sessions
 
-    async def load_session(self, session_identifier: str, switch=True) -> bool:
+    async def load_session(self, session_identifier: str, switch=True, quiet: bool = False) -> bool:
         """Load a saved session by name or file path."""
         if not session_identifier:
             self.io.tool_error("Please provide a session name or file path.")
@@ -103,12 +103,14 @@ async def load_session(self, session_identifier: str, switch=True) -> bool:
             with open(session_file, "r", encoding="utf-8") as f:
                 session_data = json.load(f)
         except Exception as e:
-            self.io.tool_error(f"Error loading session: {e}")
+            if not quiet:
+                self.io.tool_error(f"Error loading session: {e}")
             return False
 
         # Verify session format
         if not isinstance(session_data, dict) or "version" not in session_data:
-            self.io.tool_error("Invalid session format.")
+            if not quiet:
+                self.io.tool_error("Invalid session format.")
             return False
 
         # Apply session data

From 8e2f52d123f5e2b9a275935761a9b7f5149a3a47 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Sun, 31 May 2026 22:42:38 -0400
Subject: [PATCH 32/37] Add session quiet test from PR #536

---
 tests/basic/test_sessions.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tests/basic/test_sessions.py b/tests/basic/test_sessions.py
index c6611e12909..4248c98b82b 100644
--- a/tests/basic/test_sessions.py
+++ b/tests/basic/test_sessions.py
@@ -57,6 +57,21 @@ def session_manager(mock_coder):
     return SessionManager(mock_coder, mock_coder.io)
 
 
+@pytest.mark.asyncio
+async def test_load_session_quiet_skips_tool_error_on_invalid_json(
+    session_manager, mock_coder, tmp_path
+):
+    """BrightVision auto-load uses quiet=True when restore is best-effort."""
+    session_dir = tmp_path / ".cecli" / "sessions"
+    os.makedirs(session_dir, exist_ok=True)
+    mock_coder.abs_root_path.side_effect = lambda x: str(tmp_path / x)
+    bad = session_dir / "bad.json"
+    bad.write_text("not json", encoding="utf-8")
+
+    assert await session_manager.load_session(str(bad), switch=False, quiet=True) is False
+    mock_coder.io.tool_error.assert_not_called()
+
+
 def test_save_session(session_manager, mock_coder, tmp_path):
     """Test saving a session."""
     session_dir = tmp_path / ".cecli" / "sessions"

From 7aaf2085d9e397960297995a834a6f3a1513b948 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Sun, 31 May 2026 22:49:13 -0400
Subject: [PATCH 33/37] Repo map messaging adjustments from PR #536

---
 cecli/repomap.py            | 16 ++++++++++++----
 tests/basic/test_repomap.py | 14 ++++++++++++++
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/cecli/repomap.py b/cecli/repomap.py
index 8c0f379d21c..1f721ef0ca3 100644
--- a/cecli/repomap.py
+++ b/cecli/repomap.py
@@ -746,6 +746,7 @@ def get_ranked_tags(
 
         num_fnames = len(fnames)
         fname_index = 0
+        skipped_missing = 0
         for fname in fnames:
             if self.verbose:
                 self.io.tool_output(f"Processing {fname}")
@@ -762,12 +763,14 @@ def get_ranked_tags(
                 file_ok = False
 
             if not file_ok:
+                skipped_missing += 1
                 if fname not in self.warned_files:
-                    self.io.tool_warning(f"Repo-map can't include {fname}")
-                    self.io.tool_output(
-                        "Has it been deleted from the file system but not from git?"
-                    )
                     self.warned_files.add(fname)
+                    if skipped_missing <= 2:
+                        self.io.tool_warning(
+                            f"Repo-map skipping missing file: {fname}"
+                            " (removed on disk or not yet written)."
+                        )
                 continue
 
             # dump(fname)
@@ -843,6 +846,11 @@ def get_ranked_tags(
                     if tag.specific_kind == "import":
                         file_imports[rel_fname].add(tag.name)
 
+        if skipped_missing > 2:
+            self.io.tool_output(
+                f"Repo-map skipped {skipped_missing} paths that are not readable on disk."
+            )
+
         self.io.profile("Process Files")
 
         if self.use_enhanced_map and len(file_imports) > 0:
diff --git a/tests/basic/test_repomap.py b/tests/basic/test_repomap.py
index cae2c122ad0..53079604a04 100644
--- a/tests/basic/test_repomap.py
+++ b/tests/basic/test_repomap.py
@@ -50,6 +50,20 @@ def test_get_repo_map(self):
             # close the open cache files, so Windows won't error
             del repo_map
 
+    def test_repomap_resolves_relative_paths(self):
+        """Relative paths from git status must resolve against repo_root."""
+        with IgnorantTemporaryDirectory() as temp_dir:
+            py_file = os.path.join(temp_dir, "module.py")
+            with open(py_file, "w", encoding="utf-8") as f:
+                f.write("def helper():\n    return 1\n")
+
+            io = InputOutput()
+            repo_map = RepoMap(main_model=self.GPT35, io=io, repo_root=temp_dir)
+            ranked = repo_map.get_ranked_tags([], ["module.py"], set(), set(), progress=False)
+            assert ranked is not None
+            assert len(ranked) > 0
+            del repo_map
+
     def test_repo_map_refresh_files(self):
         with GitTemporaryDirectory() as temp_dir:
             repo = git.Repo(temp_dir, odbt=git.GitCmdObjectDB)

From 7ee2920feec3002a394c6f99f17aa0f8736355e3 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Sun, 31 May 2026 23:26:13 -0400
Subject: [PATCH 34/37] Add `--exempt-paths` to allow for the behavior in PR
 #532 but more generally

---
 cecli/args.py                | 11 +++++++++++
 cecli/commands/add.py        | 14 ++++++++++++++
 tests/basic/test_commands.py | 17 +++++++++++++++++
 3 files changed, 42 insertions(+)

diff --git a/cecli/args.py b/cecli/args.py
index 387f4764e78..f095f594e06 100644
--- a/cecli/args.py
+++ b/cecli/args.py
@@ -536,6 +536,17 @@ def get_parser(default_config_files, git_root):
             " False)"
         ),
     )
+    group.add_argument(
+        "--exempt-paths",
+        action="append",
+        metavar="REGEX",
+        default=[],
+        help=(
+            "Specify a regex pattern for paths that should be exempted from file creation. "
+            "When /add matches a path matching any exempt pattern, it will not offer to "
+            "create the file. Can be used multiple times."
+        ),
+    )
     ##########
     group = parser.add_argument_group("Output settings")
     group.add_argument(
diff --git a/cecli/commands/add.py b/cecli/commands/add.py
index 2889841af54..c4a4e31d15b 100644
--- a/cecli/commands/add.py
+++ b/cecli/commands/add.py
@@ -71,6 +71,20 @@ async def execute(cls, io, coder, args, **kwargs):
             if len(confirm_fname) > 64:
                 confirm_fname = f".../{os.path.basename(confirm_fname)}"
 
+            # Check if the path matches any exempt-path regex patterns
+            exempt_paths = getattr(coder.args, "exempt_paths", None) or []
+            if exempt_paths:
+                try:
+                    rel_norm = os.path.relpath(fname, coder.root).replace("\\", "/")
+                except ValueError:
+                    rel_norm = str(fname).replace("\\", "/")
+                if any(re.search(p, rel_norm) for p in exempt_paths):
+                    io.tool_error(
+                        f"Path '{confirm_fname}' matches an exempt-path pattern. "
+                        "Skipping file creation."
+                    )
+                    continue
+
             if await io.confirm_ask(
                 f"No files matched '{confirm_fname}'. Do you want to create this file?"
             ):
diff --git a/tests/basic/test_commands.py b/tests/basic/test_commands.py
index 3955486ec87..c62f8ec26fc 100644
--- a/tests/basic/test_commands.py
+++ b/tests/basic/test_commands.py
@@ -231,6 +231,23 @@ async def test_cmd_add_no_match_but_make_it(self):
         self.assertEqual(len(coder.abs_fnames), 1)
         self.assertTrue(fname.exists())
 
+    async def test_cmd_add_skips_create_on_exempt_path(self):
+        """Test that /add skips file creation for paths matching exempt-path regex patterns."""
+        io = InputOutput(pretty=False, fancy_input=False, yes=True)
+        from types import SimpleNamespace
+
+        from cecli.coders import Coder
+
+        args = SimpleNamespace(exempt_paths=[r"\.[^/]+/attachments"])
+        coder = await Coder.create(self.GPT35, None, io, args=args)
+        commands = Commands(io, coder)
+
+        staging = Path(".cecli/attachments/missing.png")
+        commands.execute("add", str(staging))
+
+        self.assertEqual(len(coder.abs_fnames), 0)
+        self.assertFalse(staging.exists())
+
     async def test_cmd_add_drop_directory(self):
         # Initialize the Commands and InputOutput objects
         io = InputOutput(pretty=False, fancy_input=False, yes=False)

From 4cd3c67caac0d2452ac099b53c26487386336f0b Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Sun, 31 May 2026 23:32:19 -0400
Subject: [PATCH 35/37] Remove regex env var from exempt paths argument

---
 cecli/args.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cecli/args.py b/cecli/args.py
index f095f594e06..862ef35bfaa 100644
--- a/cecli/args.py
+++ b/cecli/args.py
@@ -539,7 +539,6 @@ def get_parser(default_config_files, git_root):
     group.add_argument(
         "--exempt-paths",
         action="append",
-        metavar="REGEX",
         default=[],
         help=(
             "Specify a regex pattern for paths that should be exempted from file creation. "

From e9deccabf43f38f6048b15b40a1c2db42532ac81 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Sun, 31 May 2026 23:36:56 -0400
Subject: [PATCH 36/37] Fix response name shadowing in mcp tool parsing

---
 cecli/coders/agent_coder.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cecli/coders/agent_coder.py b/cecli/coders/agent_coder.py
index 8d77628ce25..d57cfad9a9d 100644
--- a/cecli/coders/agent_coder.py
+++ b/cecli/coders/agent_coder.py
@@ -839,7 +839,7 @@ async def gather_and_await():
 
     async def _execute_mcp_tools(self, server, tool_calls):
         """Execute MCP tools via LiteLLM."""
-        responses = []
+        tool_responses = []
         for tool_call in tool_calls:
             # Use existing _execute_mcp_tool logic
             result = await self._execute_mcp_tool(
@@ -847,14 +847,14 @@ async def _execute_mcp_tools(self, server, tool_calls):
                 tool_call.function.name,
                 responses.parse_tool_arguments(tool_call.function.arguments),
             )
-            responses.append(
+            tool_responses.append(
                 {
                     "role": "tool",
                     "tool_call_id": tool_call.id,
                     "content": result,
                 }
             )
-        return responses
+        return tool_responses
 
     def get_active_model(self):
         if self.main_model.agent_model:

From e0c81f1e28500097b772ece3c34a56b16ffcdcd8 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Mon, 1 Jun 2026 00:36:58 -0400
Subject: [PATCH 37/37] Propagate quiet parameter to session read method

---
 cecli/sessions.py | 137 ++++++++++++++++++++++++----------------------
 1 file changed, 71 insertions(+), 66 deletions(-)

diff --git a/cecli/sessions.py b/cecli/sessions.py
index 7f9ad03203d..969bb816638 100644
--- a/cecli/sessions.py
+++ b/cecli/sessions.py
@@ -17,71 +17,6 @@ def __init__(self, coder, io):
         self.coder = coder
         self.io = io
 
-    def _get_session_directory(self) -> Path:
-        """Get the session directory, creating it if necessary."""
-        session_dir = Path(self.coder.abs_root_path(".cecli/sessions"))
-        os.makedirs(session_dir, exist_ok=True)
-        return session_dir
-
-    def _session_encrypt_settings(self) -> tuple[bool, bytes | None]:
-        args = getattr(self.coder, "args", None)
-        if not args or not getattr(args, "session_encrypt", False):
-            return False, None
-        key_file = getattr(args, "session_key_file", None)
-        return True, session_crypto.resolve_key(key_file=key_file)
-
-    def _read_session_file(self, session_file: Path) -> dict | None:
-        try:
-            data = session_file.read_bytes()
-        except OSError as e:
-            self.io.tool_error(f"Error reading session: {e}")
-            return None
-        try:
-            if session_crypto.is_encrypted_payload(data):
-                args = getattr(self.coder, "args", None)
-                key_file = getattr(args, "session_key_file", None) if args else None
-                key = session_crypto.resolve_key(key_file=key_file)
-                if not key:
-                    self.io.tool_error(
-                        "Session is encrypted but no key is configured "
-                        f"({session_crypto.KEY_ENV} or --session-key-file)."
-                    )
-                    return None
-                return session_crypto.decrypt_session_bytes(data, key)
-            parsed = json.loads(data.decode("utf-8"))
-            if not isinstance(parsed, dict):
-                self.io.tool_error("Invalid session format.")
-                return None
-            return parsed
-        except session_crypto.SessionCryptoError as e:
-            self.io.tool_error(str(e))
-            return None
-        except (UnicodeDecodeError, json.JSONDecodeError) as e:
-            self.io.tool_error(f"Error loading session: {e}")
-            return None
-
-    def _write_session_file(self, session_file: Path, session_data: dict) -> bool:
-        encrypt_enabled, key = self._session_encrypt_settings()
-        try:
-            if encrypt_enabled:
-                if not key:
-                    self.io.tool_error(
-                        "Session encryption is enabled but no key is configured "
-                        f"({session_crypto.KEY_ENV} or --session-key-file)."
-                    )
-                    return False
-                session_file.write_bytes(session_crypto.encrypt_session_dict(session_data, key))
-            else:
-                with open(session_file, "w", encoding="utf-8") as f:
-                    json.dump(session_data, f, indent=2)
-            return True
-        except session_crypto.SessionCryptoError as e:
-            self.io.tool_error(str(e))
-            return False
-        except OSError as e:
-            self.io.tool_error(f"Error saving session: {e}")
-            return False
-
     def save_session(self, session_name: str, output=True) -> bool:
         """Save the current chat session to a named file."""
         if not session_name:
@@ -180,7 +115,7 @@ async def load_session(self, session_identifier: str, switch=True, quiet: bool =
         if not session_file:
             return False
 
-        session_data = self._read_session_file(session_file)
+        session_data = self._read_session_file(session_file, quiet=quiet)
         if session_data is None:
             return False
 
@@ -207,6 +142,76 @@ async def load_session(self, session_identifier: str, switch=True, quiet: bool =
             )
         return applied
 
+    def _get_session_directory(self) -> Path:
+        """Get the session directory, creating it if necessary."""
+        session_dir = Path(self.coder.abs_root_path(".cecli/sessions"))
+        os.makedirs(session_dir, exist_ok=True)
+        return session_dir
+
+    def _session_encrypt_settings(self) -> tuple[bool, bytes | None]:
+        args = getattr(self.coder, "args", None)
+        if not args or not getattr(args, "session_encrypt", False):
+            return False, None
+        key_file = getattr(args, "session_key_file", None)
+        return True, session_crypto.resolve_key(key_file=key_file)
+
+    def _read_session_file(self, session_file: Path, quiet: bool = False) -> dict | None:
+        try:
+            data = session_file.read_bytes()
+        except OSError as e:
+            if not quiet:
+                self.io.tool_error(f"Error reading session: {e}")
+            return None
+        try:
+            if session_crypto.is_encrypted_payload(data):
+                args = getattr(self.coder, "args", None)
+                key_file = getattr(args, "session_key_file", None) if args else None
+                key = session_crypto.resolve_key(key_file=key_file)
+                if not key:
+                    if not quiet:
+                        self.io.tool_error(
+                            "Session is encrypted but no key is configured "
+                            f"({session_crypto.KEY_ENV} or --session-key-file)."
+                        )
+                    return None
+                return session_crypto.decrypt_session_bytes(data, key)
+            parsed = json.loads(data.decode("utf-8"))
+            if not isinstance(parsed, dict):
+                if not quiet:
+                    self.io.tool_error("Invalid session format.")
+                return None
+            return parsed
+        except session_crypto.SessionCryptoError as e:
+            if not quiet:
+                self.io.tool_error(str(e))
+            return None
+        except (UnicodeDecodeError, json.JSONDecodeError) as e:
+            if not quiet:
+                self.io.tool_error(f"Error loading session: {e}")
+            return None
+
+    def _write_session_file(self, session_file: Path, session_data: dict) -> bool:
+        encrypt_enabled, key = self._session_encrypt_settings()
+        try:
+            if encrypt_enabled:
+                if not key:
+                    self.io.tool_error(
+                        "Session encryption is enabled but no key is configured "
+                        f"({session_crypto.KEY_ENV} or --session-key-file)."
+                    )
+                    return False
+                session_file.write_bytes(session_crypto.encrypt_session_dict(session_data, key))
+            else:
+                with open(session_file, "w", encoding="utf-8") as f:
+                    json.dump(session_data, f, indent=2)
+            return True
+        except session_crypto.SessionCryptoError as e:
+            self.io.tool_error(str(e))
+            return False
+        except OSError as e:
+            self.io.tool_error(f"Error saving session: {e}")
+            return False
+
     def _build_session_data(self, session_name) -> Dict:
         """Build session data dictionary from current coder state."""
         # Get relative paths for all files