From e844e53fe126ccfe4e061ffc682e9d587adf1197 Mon Sep 17 00:00:00 2001 From: Jessica Mulein Date: Wed, 27 May 2026 10:15:12 -0700 Subject: [PATCH 01/37] feat(sessions): optional AES-256-GCM encryption for saved sessions Add --session-encrypt with CECLI_SESSION_KEY or --session-key-file, wire encrypt/decrypt through SessionManager save/load/list, and document usage. Plaintext JSON remains the default when encryption is off. Co-authored-by: Cursor --- cecli/args.py | 18 +++ cecli/session_crypto.py | 108 +++++++++++++ cecli/sessions.py | 102 +++++++++++-- cecli/website/docs/usage/sessions.md | 11 ++ requirements/requirements.in | 1 + tests/basic/conftest.py | 24 +++ tests/basic/test_session_args.py | 31 ++++ tests/basic/test_session_crypto.py | 100 ++++++++++++ tests/basic/test_sessions_manager.py | 219 +++++++++++++++++++++++++++ 9 files changed, 603 insertions(+), 11 deletions(-) create mode 100644 cecli/session_crypto.py create mode 100644 tests/basic/conftest.py create mode 100644 tests/basic/test_session_args.py create mode 100644 tests/basic/test_session_crypto.py create mode 100644 tests/basic/test_sessions_manager.py diff --git a/cecli/args.py b/cecli/args.py index 387f4764e78..07b947e17ae 100644 --- a/cecli/args.py +++ b/cecli/args.py @@ -370,6 +370,24 @@ def get_parser(default_config_files, git_root): " (default: False)" ), ) + group.add_argument( + "--session-encrypt", + action=argparse.BooleanOptionalAction, + default=False, + help=( + "Encrypt saved sessions on disk (AES-256-GCM). Requires CECLI_SESSION_KEY or" + " --session-key-file (default: False)" + ), + ) + group.add_argument( + "--session-key-file", + metavar="SESSION_KEY_FILE", + default=None, + help=( + "File containing a urlsafe-base64 32-byte session encryption key" + " (default: use CECLI_SESSION_KEY only)" + ), + ).complete = shtab.FILE group.add_argument( "--mcp-servers", metavar="MCP_CONFIG_JSON", diff --git a/cecli/session_crypto.py b/cecli/session_crypto.py new file mode 100644 index 00000000000..dbbcab8a9a1 --- /dev/null +++ b/cecli/session_crypto.py @@ -0,0 +1,108 @@ +"""Optional AES-256-GCM encryption for on-disk cecli session files.""" + +from __future__ import annotations + +import base64 +import json +import os +from pathlib import Path +from typing import Any + +MAGIC = b"CECLI_ENCRYPTED_SESSION_v1\n" +KEY_ENV = "CECLI_SESSION_KEY" +KEY_BYTES = 32 + + +class SessionCryptoError(Exception): + """Session encrypt/decrypt failed.""" + + +def is_encrypted_payload(data: bytes) -> bool: + return data.startswith(MAGIC) + + +def resolve_key(*, key_file: str | Path | None = None) -> bytes | None: + """Load a 32-byte key from CECLI_SESSION_KEY (urlsafe base64) or a key file.""" + raw = os.environ.get(KEY_ENV, "").strip() + if raw: + key = _decode_key_b64(raw) + if key is not None: + return key + if key_file: + path = Path(key_file).expanduser() + if path.is_file(): + text = path.read_text(encoding="utf-8").strip() + key = _decode_key_b64(text) + if key is not None: + return key + return None + + +def _decode_key_b64(text: str) -> bytes | None: + try: + padded = text + "=" * (-len(text) % 4) + key = base64.urlsafe_b64decode(padded.encode("ascii")) + except (ValueError, UnicodeEncodeError): + return None + if len(key) != KEY_BYTES: + return None + return key + + +def encrypt_session_dict(session_data: dict[str, Any], key: bytes) -> bytes: + if len(key) != KEY_BYTES: + raise SessionCryptoError(f"Session key must be {KEY_BYTES} bytes.") + try: + from cryptography.hazmat.primitives.ciphers.aead import AESGCM + except ImportError as err: + raise SessionCryptoError( + "Session encryption requires the cryptography package (pip install cryptography)." + ) from err + + plaintext = json.dumps(session_data, ensure_ascii=False).encode("utf-8") + nonce = os.urandom(12) + ciphertext = AESGCM(key).encrypt(nonce, plaintext, None) + payload = base64.urlsafe_b64encode(nonce + ciphertext).decode("ascii") + return MAGIC + payload.encode("ascii") + b"\n" + + +def decrypt_session_bytes(data: bytes, key: bytes) -> dict[str, Any]: + if len(key) != KEY_BYTES: + raise SessionCryptoError(f"Session key must be {KEY_BYTES} bytes.") + if not is_encrypted_payload(data): + try: + parsed = json.loads(data.decode("utf-8")) + except json.JSONDecodeError as err: + raise SessionCryptoError("Invalid session file (not JSON).") from err + if not isinstance(parsed, dict): + raise SessionCryptoError("Invalid session format.") + return parsed + + body = data[len(MAGIC) :].strip() + if not body: + raise SessionCryptoError("Encrypted session file is empty.") + try: + from cryptography.hazmat.primitives.ciphers.aead import AESGCM + except ImportError as err: + raise SessionCryptoError( + "Session encryption requires the cryptography package (pip install cryptography)." + ) from err + + try: + blob = base64.urlsafe_b64decode(body + b"=" * (-len(body) % 4)) + except ValueError as err: + raise SessionCryptoError("Encrypted session payload is invalid.") from err + if len(blob) < 13: + raise SessionCryptoError("Encrypted session payload is too short.") + nonce, ciphertext = blob[:12], blob[12:] + try: + plaintext = AESGCM(key).decrypt(nonce, ciphertext, None) + except Exception as err: + raise SessionCryptoError("Could not decrypt session (wrong key or corrupted file).") from err + try: + parsed = json.loads(plaintext.decode("utf-8")) + except json.JSONDecodeError as err: + raise SessionCryptoError("Decrypted session is not valid JSON.") from err + if not isinstance(parsed, dict): + raise SessionCryptoError("Invalid session format.") + return parsed diff --git a/cecli/sessions.py b/cecli/sessions.py index f1ee5a12570..d097b48fa4e 100644 --- a/cecli/sessions.py +++ b/cecli/sessions.py @@ -6,6 +6,7 @@ from typing import Dict, List, Optional from cecli import models +from cecli import session_crypto from cecli.helpers.conversation import ConversationService, MessageTag @@ -22,6 +23,67 @@ def _get_session_directory(self) -> Path: os.makedirs(session_dir, exist_ok=True) return session_dir + def _session_encrypt_settings(self) -> tuple[bool, bytes | None]: + args = getattr(self.coder, "args", None) + if not args or not getattr(args, "session_encrypt", False): + return False, None + key_file = getattr(args, "session_key_file", None) + return True, session_crypto.resolve_key(key_file=key_file) + + def _read_session_file(self, session_file: Path) -> dict | None: + try: + data = session_file.read_bytes() + except OSError as e: + self.io.tool_error(f"Error reading session: {e}") + return None + try: + if session_crypto.is_encrypted_payload(data): + args = getattr(self.coder, "args", None) + key_file = getattr(args, "session_key_file", None) if args else None + key = session_crypto.resolve_key(key_file=key_file) + if not key: + self.io.tool_error( + "Session is encrypted but no key is configured " + f"({session_crypto.KEY_ENV} or --session-key-file)." + ) + return None + return session_crypto.decrypt_session_bytes(data, key) + parsed = json.loads(data.decode("utf-8")) + if not isinstance(parsed, dict): + self.io.tool_error("Invalid session format.") + return None + return parsed + except session_crypto.SessionCryptoError as e: + self.io.tool_error(str(e)) + return None + except json.JSONDecodeError as e: + self.io.tool_error(f"Error loading session: {e}") + return None + + def _write_session_file(self, session_file: Path, session_data: dict) -> bool: + encrypt_enabled, key = self._session_encrypt_settings() + try: + if encrypt_enabled: + if not key: + self.io.tool_error( + "Session encryption is enabled but no key is configured " + f"({session_crypto.KEY_ENV} or --session-key-file)." + ) + return False + session_file.write_bytes( + session_crypto.encrypt_session_dict(session_data, key) + ) + else: + with open(session_file, "w", encoding="utf-8") as f: + json.dump(session_data, f, indent=2) + return True + except session_crypto.SessionCryptoError as e: + self.io.tool_error(str(e)) + return False + except OSError as e: + self.io.tool_error(f"Error saving session: {e}") + return False + def save_session(self, session_name: str, output=True) -> bool: """Save the current chat session to a named file.""" if not session_name: @@ -39,11 +101,12 @@ def save_session(self, session_name: str, output=True) -> bool: try: session_data = self._build_session_data(session_name) - with open(session_file, "w", encoding="utf-8") as f: - json.dump(session_data, f, indent=2) + if not self._write_session_file(session_file, session_data): + return False if output: - self.io.tool_output(f"Session saved: {session_file}") + suffix = " (encrypted)" if self._session_encrypt_settings()[0] else "" + self.io.tool_output(f"Session saved: {session_file}{suffix}") return True @@ -63,8 +126,27 @@ def list_sessions(self) -> List[Dict]: sessions = [] for session_file in sorted(session_files, key=lambda x: x.stat().st_mtime, reverse=True): try: - with open(session_file, "r", encoding="utf-8") as f: - session_data = json.load(f) + raw = session_file.read_bytes() + if session_crypto.is_encrypted_payload(raw): + _, key = self._session_encrypt_settings() + if not key: + sessions.append( + { + "name": session_file.stem, + "file": session_file, + "model": "encrypted", + "edit_format": "—", + "num_messages": 0, + "num_files": 0, + "encrypted": True, + } + ) + continue + session_data = session_crypto.decrypt_session_bytes(raw, key) + else: + session_data = json.loads(raw.decode("utf-8")) + if not isinstance(session_data, dict): + raise ValueError("not a session object") session_info = { "name": session_file.stem, @@ -80,6 +162,7 @@ def list_sessions(self) -> List[Dict]: + len(session_data.get("files", {}).get("read_only", [])) + len(session_data.get("files", {}).get("read_only_stubs", [])) ), + "encrypted": session_crypto.is_encrypted_payload(raw), } sessions.append(session_info) @@ -99,15 +182,12 @@ async def load_session(self, session_identifier: str, switch=True) -> bool: if not session_file: return False - try: - with open(session_file, "r", encoding="utf-8") as f: - session_data = json.load(f) - except Exception as e: - self.io.tool_error(f"Error loading session: {e}") + session_data = self._read_session_file(session_file) + if session_data is None: return False # Verify session format - if not isinstance(session_data, dict) or "version" not in session_data: + if "version" not in session_data: self.io.tool_error("Invalid session format.") return False diff --git a/cecli/website/docs/usage/sessions.md b/cecli/website/docs/usage/sessions.md index ada211cea95..ff7dc663b9b 100644 --- a/cecli/website/docs/usage/sessions.md +++ b/cecli/website/docs/usage/sessions.md @@ -158,6 +158,17 @@ Sessions are stored as JSON files in the `.cecli/sessions/` directory within you ### Version Control - Consider adding `.cecli/sessions/` to your `.gitignore` if sessions contain sensitive information +### Optional encryption (AES-256-GCM) + +When enabled, session files on disk are encrypted (plaintext JSON is unchanged when disabled). + +```bash +export CECLI_SESSION_KEY="$(python -c 'import os,base64; print(base64.urlsafe_b64encode(os.urandom(32)).decode())')" +cecli --session-encrypt --auto-save +``` + +Or use `--session-key-file` pointing at a file with the same urlsafe-base64 32-byte key. BrightVision stores the key in the OS keychain and sets `CECLI_SESSION_KEY` for the Vision API process. + ## Troubleshooting ### Session Not Found diff --git a/requirements/requirements.in b/requirements/requirements.in index 895678008c9..26d155cda8c 100644 --- a/requirements/requirements.in +++ b/requirements/requirements.in @@ -32,6 +32,7 @@ tomlkit>=0.14.0 truststore xxhash>=3.6.0 py-cymbal>=0.1.24 +cryptography>=42.0.0 # Replaced networkx with rustworkx for better performance in repomap rustworkx>=0.15.0 diff --git a/tests/basic/conftest.py b/tests/basic/conftest.py new file mode 100644 index 00000000000..b5126377867 --- /dev/null +++ b/tests/basic/conftest.py @@ -0,0 +1,24 @@ +"""Shared fixtures for cecli basic tests.""" + +import base64 +import os + +import pytest + +from cecli import session_crypto + + +@pytest.fixture +def session_key32(): + return os.urandom(session_crypto.KEY_BYTES) + + +@pytest.fixture +def session_key_b64(session_key32): + return base64.urlsafe_b64encode(session_key32).decode().rstrip("=") + + +@pytest.fixture +def session_key_env(monkeypatch, session_key32, session_key_b64): + monkeypatch.setenv(session_crypto.KEY_ENV, session_key_b64) + return session_key32 diff --git a/tests/basic/test_session_args.py b/tests/basic/test_session_args.py new file mode 100644 index 00000000000..7576ee5c439 --- /dev/null +++ b/tests/basic/test_session_args.py @@ -0,0 +1,31 @@ +"""CLI args for session encryption and auto-save.""" + +from cecli.args import get_parser + + +def test_session_encrypt_defaults_off(): + parser = get_parser([], "/tmp/project") + args = parser.parse_args([]) + assert args.session_encrypt is False + assert args.session_key_file is None + assert args.auto_save is False + assert args.auto_load is False + assert args.auto_save_session_name == "auto-save" + + +def test_session_encrypt_flag(): + parser = get_parser([], "/tmp/project") + args = parser.parse_args(["--session-encrypt"]) + assert args.session_encrypt is True + + +def test_session_encrypt_no_flag(): + parser = get_parser([], "/tmp/project") + args = parser.parse_args(["--no-session-encrypt"]) + assert args.session_encrypt is False + + +def test_session_key_file_flag(): + parser = get_parser([], "/tmp/project") + args = parser.parse_args(["--session-key-file", "/tmp/key.bin"]) + assert args.session_key_file == "/tmp/key.bin" diff --git a/tests/basic/test_session_crypto.py b/tests/basic/test_session_crypto.py new file mode 100644 index 00000000000..949e617adfb --- /dev/null +++ b/tests/basic/test_session_crypto.py @@ -0,0 +1,100 @@ +"""Unit tests for cecli.session_crypto.""" + +import base64 +import json +import os + +import pytest + +from cecli import session_crypto + + +def test_roundtrip_encrypted(session_key32): + data = {"version": 1, "session_name": "t", "model": "gpt-4"} + blob = session_crypto.encrypt_session_dict(data, session_key32) + assert session_crypto.is_encrypted_payload(blob) + assert session_crypto.decrypt_session_bytes(blob, session_key32) == data + + +def test_plaintext_json_still_loads(session_key32): + raw = json.dumps({"version": 1}).encode("utf-8") + assert not session_crypto.is_encrypted_payload(raw) + out = session_crypto.decrypt_session_bytes(raw, session_key32) + assert out["version"] == 1 + + +def test_wrong_key_fails(session_key32): + blob = session_crypto.encrypt_session_dict({"version": 1}, session_key32) + with pytest.raises(session_crypto.SessionCryptoError): + session_crypto.decrypt_session_bytes(blob, os.urandom(32)) + + +def test_invalid_key_length_rejected(): + with pytest.raises(session_crypto.SessionCryptoError): + session_crypto.encrypt_session_dict({"version": 1}, b"short") + + +def test_resolve_key_from_env(session_key_env, session_key32): + assert session_crypto.resolve_key() == session_key32 + + +def test_resolve_key_from_file(tmp_path, session_key32): + path = tmp_path / "key.txt" + path.write_text(base64.urlsafe_b64encode(session_key32).decode(), encoding="utf-8") + assert session_crypto.resolve_key(key_file=path) == session_key32 + + +def test_resolve_key_missing_returns_none(monkeypatch): + monkeypatch.delenv(session_crypto.KEY_ENV, raising=False) + assert session_crypto.resolve_key() is None + + +def test_resolve_key_rejects_bad_env(monkeypatch): + monkeypatch.setenv(session_crypto.KEY_ENV, "not-valid-key-material") + assert session_crypto.resolve_key() is None + + +def test_magic_prefix_constant(): + assert session_crypto.MAGIC.startswith(b"CECLI_ENCRYPTED_SESSION") + + +def test_corrupt_ciphertext_raises(session_key32): + blob = session_crypto.MAGIC + b"not-valid-base64!!!\n" + with pytest.raises(session_crypto.SessionCryptoError): + session_crypto.decrypt_session_bytes(blob, session_key32) + + +def test_empty_encrypted_body_raises(session_key32): + blob = session_crypto.MAGIC + b"\n" + with pytest.raises(session_crypto.SessionCryptoError): + session_crypto.decrypt_session_bytes(blob, session_key32) + + +def test_encrypted_file_roundtrip_on_disk(tmp_path, session_key32): + path = tmp_path / "sess.json" + payload = {"version": 1, "session_name": "disk", "chat_history": {"done_messages": [], "cur_messages": []}} + path.write_bytes(session_crypto.encrypt_session_dict(payload, session_key32)) + raw = path.read_bytes() + assert session_crypto.is_encrypted_payload(raw) + assert session_crypto.decrypt_session_bytes(raw, session_key32) == payload + + +def test_unicode_roundtrip(session_key32): + payload = {"version": 1, "session_name": "t", "todo_list": "— fix café naïve"} + blob = session_crypto.encrypt_session_dict(payload, session_key32) + assert session_crypto.decrypt_session_bytes(blob, session_key32) == payload + + +def test_cryptography_import_error(monkeypatch): + import builtins + + real_import = builtins.__import__ + + def fake_import(name, *args, **kwargs): + if name == "cryptography.hazmat.primitives.ciphers.aead": + raise ImportError("blocked for test") + return real_import(name, *args, **kwargs) + + monkeypatch.setattr(builtins, "__import__", fake_import) + with pytest.raises(session_crypto.SessionCryptoError, match="cryptography"): + session_crypto.encrypt_session_dict({"version": 1}, os.urandom(32)) diff --git a/tests/basic/test_sessions_manager.py b/tests/basic/test_sessions_manager.py new file mode 100644 index 00000000000..d56ae834e08 --- /dev/null +++ b/tests/basic/test_sessions_manager.py @@ -0,0 +1,219 @@ +"""SessionManager on-disk persistence and optional encryption.""" + +from __future__ import annotations + +import json +import os +from pathlib import Path +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest + +from cecli import session_crypto +from cecli.io import InputOutput +from cecli.sessions import SessionManager + + +def _prepare_workspace(coder, tmp_path) -> Path: + root = Path(tmp_path) + coder.abs_root_path.side_effect = lambda x: str(root / x) + (root / ".cecli" / "sessions").mkdir(parents=True, exist_ok=True) + (root / "file1.py").write_text("", encoding="utf-8") + return root + + +@pytest.fixture +def mock_coder(monkeypatch): + main_model = MagicMock() + main_model.name = "test_model" + main_model.weak_model.name = "weak" + main_model.editor_model.name = "editor" + main_model.agent_model.name = "agent" + main_model.editor_edit_format = "editor-diff" + main_model.retries = 0 + main_model.debug = False + + conv_manager = MagicMock() + conv_manager.get_messages_dict.return_value = [] + files_manager = MagicMock() + monkeypatch.setattr( + "cecli.sessions.ConversationService.get_manager", + lambda _coder: conv_manager, + ) + monkeypatch.setattr( + "cecli.sessions.ConversationService.get_files", + lambda _coder: files_manager, + ) + monkeypatch.setattr( + "cecli.sessions.models.Model", + lambda *args, **kwargs: main_model, + ) + + coder = MagicMock() + coder.abs_fnames = set() + coder.abs_read_only_fnames = set() + coder.abs_read_only_stubs_fnames = set() + coder.auto_commits = True + coder.auto_lint = True + coder.auto_test = False + coder.total_tokens_sent = 0 + coder.total_tokens_received = 0 + coder.total_cached_tokens = 0 + coder.total_cost = 0.0 + coder.edit_format = "diff" + coder.format_chat_chunks = MagicMock() + coder.get_rel_fname.side_effect = lambda x: os.path.basename(x) + coder.local_agent_folder.side_effect = lambda x: f".cecli/{x}" + coder.io = MagicMock(spec=InputOutput) + coder.agent_config = {} + coder.mcp_manager = None + coder.skills_manager = None + coder.main_model = main_model + coder.args = SimpleNamespace( + model="test_model", + weak_model="weak", + editor_model="editor", + agent_model="agent", + editor_edit_format="editor-diff", + verbose=False, + session_encrypt=False, + session_key_file=None, + ) + return coder + + +@pytest.fixture +def session_manager(mock_coder): + return SessionManager(mock_coder, mock_coder.io) + + +@pytest.fixture +def encrypt_coder(mock_coder, session_key_env): + mock_coder.args = SimpleNamespace( + model="test_model", + weak_model="weak", + editor_model="editor", + agent_model="agent", + editor_edit_format="editor-diff", + verbose=False, + session_encrypt=True, + session_key_file=None, + ) + return mock_coder + + +def test_save_plaintext_json(session_manager, mock_coder, tmp_path): + root = _prepare_workspace(mock_coder, tmp_path) + assert session_manager.save_session("plain", output=False) + path = root / ".cecli" / "sessions" / "plain.json" + raw = path.read_bytes() + assert raw.startswith(b"{") + data = json.loads(raw.decode("utf-8")) + assert data["session_name"] == "plain" + assert data["version"] == 1 + + +def test_save_encrypted_blob(encrypt_coder, session_key32, tmp_path): + manager = SessionManager(encrypt_coder, encrypt_coder.io) + root = _prepare_workspace(encrypt_coder, tmp_path) + assert manager.save_session("secret", output=False) + path = root / ".cecli" / "sessions" / "secret.json" + raw = path.read_bytes() + assert session_crypto.is_encrypted_payload(raw) + assert session_crypto.decrypt_session_bytes(raw, session_key32)["session_name"] == "secret" + + +def test_save_encrypt_without_key_fails(mock_coder, monkeypatch, tmp_path): + monkeypatch.delenv(session_crypto.KEY_ENV, raising=False) + _prepare_workspace(mock_coder, tmp_path) + mock_coder.args = SimpleNamespace( + model="test_model", + weak_model="weak", + editor_model="editor", + agent_model="agent", + editor_edit_format="editor-diff", + verbose=False, + session_encrypt=True, + session_key_file=None, + ) + assert SessionManager(mock_coder, mock_coder.io).save_session("nope", output=False) is False + + +def test_list_encrypted_with_key(encrypt_coder, tmp_path): + manager = SessionManager(encrypt_coder, encrypt_coder.io) + _prepare_workspace(encrypt_coder, tmp_path) + manager.save_session("listed", output=False) + rows = manager.list_sessions() + assert len(rows) == 1 + assert rows[0]["name"] == "listed" + assert rows[0].get("encrypted") is True + assert rows[0]["model"] == "test_model" + + +def test_list_encrypted_placeholder_without_key(encrypt_coder, monkeypatch, tmp_path): + manager = SessionManager(encrypt_coder, encrypt_coder.io) + _prepare_workspace(encrypt_coder, tmp_path) + manager.save_session("locked", output=False) + monkeypatch.delenv(session_crypto.KEY_ENV, raising=False) + encrypt_coder.args = SimpleNamespace( + model="test_model", + weak_model="weak", + editor_model="editor", + agent_model="agent", + editor_edit_format="editor-diff", + verbose=False, + session_encrypt=False, + session_key_file=None, + ) + rows = manager.list_sessions() + assert rows[0]["encrypted"] is True + assert rows[0]["model"] == "encrypted" + + +def test_read_legacy_plaintext_when_encrypt_enabled(encrypt_coder, tmp_path): + manager = SessionManager(encrypt_coder, encrypt_coder.io) + root = _prepare_workspace(encrypt_coder, tmp_path) + legacy = root / ".cecli" / "sessions" / "legacy.json" + legacy.write_text( + json.dumps({"version": 1, "session_name": "legacy", "model": "test_model"}), + encoding="utf-8", + ) + data = manager._read_session_file(legacy) + assert data is not None + assert data["session_name"] == "legacy" + + +@pytest.mark.asyncio +async def test_load_encrypted_without_switch(encrypt_coder, session_key32, tmp_path): + manager = SessionManager(encrypt_coder, encrypt_coder.io) + root = _prepare_workspace(encrypt_coder, tmp_path) + encrypt_coder.edit_format = "ask" + assert manager.save_session("enc", output=False) + encrypt_coder.edit_format = "diff" + path = root / ".cecli" / "sessions" / "enc.json" + assert await manager.load_session(str(path), switch=False) is True + loaded = session_crypto.decrypt_session_bytes(path.read_bytes(), session_key32) + assert loaded["edit_format"] == "ask" + + +@pytest.mark.asyncio +async def test_load_encrypted_using_env_key_only(encrypt_coder, session_key_env, tmp_path): + manager = SessionManager(encrypt_coder, encrypt_coder.io) + root = _prepare_workspace(encrypt_coder, tmp_path) + encrypt_coder.edit_format = "architect" + manager.save_session("env", output=False) + encrypt_coder.args = SimpleNamespace( + model="test_model", + weak_model="weak", + editor_model="editor", + agent_model="agent", + editor_edit_format="editor-diff", + verbose=False, + session_encrypt=False, + session_key_file=None, + ) + path = root / ".cecli" / "sessions" / "env.json" + assert await manager.load_session(str(path), switch=False) is True + loaded = session_crypto.decrypt_session_bytes(path.read_bytes(), session_key_env) + assert loaded["edit_format"] == "architect" From da1ae6b5d776e5ac032572cb06c4d533a0c8a425 Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 27 May 2026 11:12:32 -0700 Subject: [PATCH 02/37] cli-29: finally fix interruption exception on acompletion --- cecli/models.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cecli/models.py b/cecli/models.py index 4148682b44b..b9ddccca402 100644 --- a/cecli/models.py +++ b/cecli/models.py @@ -1378,6 +1378,11 @@ async def simple_send_with_retries( continue except AttributeError: return None + except KeyboardInterrupt: + # An interrupt was not caught within the async run loop. + # We'll just pass to allow the thread to exit gracefully + # without a scary traceback. + pass def model_error_response(self): return litellm.ModelResponse( From cdef3021b2f9f747b839452f131b7b73b88114ae Mon Sep 17 00:00:00 2001 From: Jessica Mulein Date: Wed, 27 May 2026 11:50:45 -0700 Subject: [PATCH 03/37] fix(agent): guard missing verbose on headless args --- cecli/coders/agent_coder.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cecli/coders/agent_coder.py b/cecli/coders/agent_coder.py index 9c5e5816a03..aff8aa865ff 100644 --- a/cecli/coders/agent_coder.py +++ b/cecli/coders/agent_coder.py @@ -265,7 +265,8 @@ def get_local_tool_schemas(self): async def initialize_mcp_tools(self): if not self.mcp_manager: - self.mcp_manager = McpServerManager([], self.io, self.args.verbose) + verbose = getattr(self.args, "verbose", False) if self.args else False + self.mcp_manager = McpServerManager([], self.io, verbose) server_name = "Local" server = self.mcp_manager.get_server(server_name) From 3cdd958b99110a6c3f68a7139ed6806d5b4a8493 Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 27 May 2026 20:58:05 -0700 Subject: [PATCH 04/37] fix: Catch BaseException in worker thread to prevent tracebacks Co-authored-by: cecli (openai/gemini_cli_local/gemini-2.5-pro) --- cecli/tui/worker.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/cecli/tui/worker.py b/cecli/tui/worker.py index 259ca0775eb..5a19e1567a4 100644 --- a/cecli/tui/worker.py +++ b/cecli/tui/worker.py @@ -49,10 +49,9 @@ def _run_thread(self): try: self.loop.run_until_complete(self._async_run()) - except asyncio.CancelledError: - pass - except RuntimeError: - # Event loop stopped - this is expected during shutdown + except BaseException: + # Catch anything that could bring down the thread, and just let it exit. + # This includes KeyboardInterrupt, SystemExit, etc. pass finally: self._cleanup_loop() @@ -188,11 +187,6 @@ def stop(self): # We'll just pass to allow the thread to exit gracefully # without a scary traceback. pass - except KeyboardInterrupt: - # An interrupt was not caught within the async run loop. - # We'll just pass to allow the thread to exit gracefully - # without a scary traceback. - pass self.interrupt() # Wait for thread to finish From e863bd2d27831183c236ec5fa68bfca3f19f6b8d Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 27 May 2026 22:08:41 -0700 Subject: [PATCH 05/37] fix: Update spinner methods to accept coder_uuid Co-authored-by: cecli (openai/gemini_ai_studio/gemini-3-flash-preview) --- cecli/tui/io.py | 21 +++++++++++++++++---- cecli/tui/widgets/footer.py | 6 ++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/cecli/tui/io.py b/cecli/tui/io.py index f204bf2c44c..ff20b443de0 100644 --- a/cecli/tui/io.py +++ b/cecli/tui/io.py @@ -328,13 +328,15 @@ def _reroute_output(self, text, msg_type, **kwargs): return False - def start_spinner(self, text, update_last_text=True): + def start_spinner(self, text, update_last_text=True, **kwargs): """Override start_spinner to send spinner state to TUI. Args: text: Spinner text update_last_text: Whether to update last_spinner_text + coder_uuid: Optional uuid string to include in the message """ + coder_uuid = kwargs.get("coder_uuid", None) # Call parent to maintain state super().start_spinner(text, update_last_text) @@ -344,23 +346,27 @@ def start_spinner(self, text, update_last_text=True): "type": "spinner", "action": "start", "text": text, + "coder_uuid": coder_uuid, } ) self.output_queue.put( { "type": "spinner", + "coder_uuid": coder_uuid, "action": "update_suffix", "text": "", } ) - def update_spinner(self, text): + def update_spinner(self, text, **kwargs): """Override update_spinner to send updates to TUI. Args: text: New spinner text + coder_uuid: Optional uuid string to include in the message """ + coder_uuid = kwargs.get("coder_uuid", None) # Call parent super().update_spinner(text) @@ -370,15 +376,18 @@ def update_spinner(self, text): "type": "spinner", "action": "update", "text": text, + "coder_uuid": coder_uuid, } ) - def update_spinner_suffix(self, text=None): + def update_spinner_suffix(self, text=None, **kwargs): """Override update_spinner_suffix to send updates to TUI. Args: text: New spinner suffix text + coder_uuid: Optional uuid string to include in the message """ + coder_uuid = kwargs.get("coder_uuid", None) # Call parent super().update_spinner_suffix(text) @@ -388,11 +397,13 @@ def update_spinner_suffix(self, text=None): "type": "spinner", "action": "update_suffix", "text": text, + "coder_uuid": coder_uuid, } ) - def stop_spinner(self): + def stop_spinner(self, **kwargs): """Override stop_spinner to send stop state to TUI.""" + coder_uuid = kwargs.get("coder_uuid", None) # Call parent super().stop_spinner() @@ -402,6 +413,8 @@ def stop_spinner(self): "type": "spinner", "action": "stop", } + "coder_uuid": coder_uuid, + "coder_uuid": coder_uuid, ) def interrupt_input(self): diff --git a/cecli/tui/widgets/footer.py b/cecli/tui/widgets/footer.py index b85f4eccd8f..52523963b53 100644 --- a/cecli/tui/widgets/footer.py +++ b/cecli/tui/widgets/footer.py @@ -10,6 +10,7 @@ class MainFooter(Static): # Left side info coder_mode = reactive("code") + agent_name = reactive("") model_name = reactive("") # Right side info @@ -46,6 +47,7 @@ def __init__( self.project_name = project_name self.git_branch = git_branch self.coder_mode = coder_mode + self.agent_name = "" self._spinner_interval = None def on_mount(self): @@ -100,6 +102,8 @@ def render(self) -> Text: left.append(f"{spinner_char} ") if self.spinner_text: left.append(self.spinner_text) + if self.agent_name: + left.append(f"({self.agent_name}) ") # When a sub-agent is generating, show its model alongside the spinner # if self._has_running_sub_agent(): @@ -178,7 +182,9 @@ def update_mode(self, mode: str): def start_spinner(self, text: str = ""): """Show spinner with optional text.""" + def start_spinner(self, text: str = "", agent_name: str = ""): self.spinner_text = text + self.agent_name = agent_name self.spinner_visible = True self.refresh() From a1336cd5ca9375160fca1d7baf5e4c3559370dc1 Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 28 May 2026 04:22:33 -0700 Subject: [PATCH 06/37] feat: Add agent-specific status messages to TUI Co-authored-by: cecli (openai/nvidia_nim/deepseek-ai/deepseek-v4-pro) --- cecli/tui/app.py | 47 +++++++++++++++++++++++++-------- cecli/tui/widgets/status_bar.py | 14 +++++++--- 2 files changed, 46 insertions(+), 15 deletions(-) diff --git a/cecli/tui/app.py b/cecli/tui/app.py index d3cd0eb736b..15c2a12252a 100644 --- a/cecli/tui/app.py +++ b/cecli/tui/app.py @@ -502,9 +502,10 @@ def check_output_queue(self): pass def handle_output_message(self, msg): - """Route output messages to appropriate handlers.""" msg_type = msg["type"] + # Resolve agent_name from coder_uuid for agent-specific status messages + agent_name = self._resolve_agent_name(msg.get("coder_uuid")) if msg_type == "output": container = self._get_output_container(msg) container.add_output(msg["text"], msg.get("task_id")) @@ -532,15 +533,15 @@ def handle_output_message(self, msg): container = self._get_output_container(msg) container.start_task(msg["task_id"], msg["title"], msg.get("task_type")) elif msg_type == "confirmation": - self.show_confirmation(msg) + self.show_confirmation(msg, agent_name=agent_name) elif msg_type == "spinner": - self.update_spinner(msg) + self.update_spinner(msg, agent_name=agent_name) elif msg_type == "ready_for_input": self.enable_input(msg) footer = self.query_one(MainFooter) footer.stop_spinner() elif msg_type == "error": - self.show_error(msg["message"]) + self.show_error(msg["message"], agent_name=agent_name) elif msg_type == "cost_update": footer = self.query_one(MainFooter) footer.update_cost(msg.get("cost", 0)) @@ -563,6 +564,28 @@ def handle_output_message(self, msg): else: self._switch_to_container(target_uuid) + + def _resolve_agent_name(self, coder_uuid: str | None) -> str | None: + """Resolve an agent display name from a coder_uuid. + + Returns the sub-agent's name if the coder_uuid belongs to a known + sub-agent, otherwise None (primary agent uses no prefix). + """ + if not coder_uuid: + return None + try: + from cecli.helpers.agents.service import AgentService + + agent_service = AgentService.get_instance(self.worker.coder) + primary_uuid = str(agent_service.coder.uuid) + if coder_uuid == primary_uuid: + return None # Primary agent gets no prefix + for info in agent_service.sub_agents.values(): + if str(info.coder.uuid) == coder_uuid: + return info.name + except Exception: + pass + return None def add_output(self, text, task_id=None): """Add output to the output container.""" output_container = self.query_one("#output", OutputContainer) @@ -601,7 +624,7 @@ def start_task(self, task_id, title, task_type="general"): output_container = self.query_one("#output", OutputContainer) output_container.start_task(task_id, title, task_type) - def show_confirmation(self, msg): + def show_confirmation(self, msg, agent_name: str | None = None): """Show inline confirmation bar.""" # Disable input while confirm bar is active input_area = self.query_one("#input", InputArea) @@ -623,6 +646,7 @@ def show_confirmation(self, msg): allow_never=allow_never, default=options.get("default", "y"), explicit_yes_required=options.get("explicit_yes_required", False), + agent_name=agent_name, ) def enable_input(self, msg, coder=None): @@ -657,13 +681,13 @@ def enable_input(self, msg, coder=None): input_area.focus() - def update_spinner(self, msg): + def update_spinner(self, msg, agent_name: str | None = None): """Update spinner in footer.""" footer = self.query_one(MainFooter) action = msg.get("action", "start") if action == "start": - footer.start_spinner(msg.get("text", "")) + footer.start_spinner(msg.get("text", ""), agent_name=agent_name) elif action == "update": footer.spinner_text = msg.get("text", "") elif action == "update_suffix": @@ -671,10 +695,11 @@ def update_spinner(self, msg): elif action == "stop": footer.stop_spinner() - def show_error(self, message): - """Show error notification.""" - status_bar = self.query_one("#status-bar", StatusBar) - status_bar.show_notification(f"Error: {message}", severity="error", timeout=10) + def show_error(self, message, agent_name: str | None = None): + """Show an error message in the status bar.""" + self.status_bar.show_notification( + message, severity="error", timeout=5, agent_name=agent_name + ) def on_resize(self) -> None: file_list = self.query_one("#file-list", FileList) diff --git a/cecli/tui/widgets/status_bar.py b/cecli/tui/widgets/status_bar.py index 5197d04f064..66925df3176 100644 --- a/cecli/tui/widgets/status_bar.py +++ b/cecli/tui/widgets/status_bar.py @@ -126,6 +126,7 @@ def __init__(self, **kwargs): """Initialize status bar.""" super().__init__(**kwargs) self._text = "" + self._agent_name: str | None = None self._severity = "info" self._show_all = False self._allow_tweak = False @@ -133,7 +134,6 @@ def __init__(self, **kwargs): self._default = "y" self._explicit_yes_required = False self._timer = None - def compose(self) -> ComposeResult: """Create empty container - content added dynamically.""" yield Horizontal(classes="status-content") @@ -153,9 +153,11 @@ def _rebuild_content(self) -> None: container.remove_children() if self.mode == "notification": - container.mount(Static(self._text, classes=f"notification-text {self._severity}")) + display_text = f"({self._agent_name}) {self._text}" if self._agent_name else self._text + container.mount(Static(display_text, classes=f"notification-text {self._severity}")) elif self.mode == "confirm": - container.mount(Static(self._text, classes="confirm-question")) + display_text = f"({self._agent_name}) {self._text}" if self._agent_name else self._text + container.mount(Static(display_text, classes="confirm-question")) hints = Horizontal(classes="confirm-hints") container.mount(hints) hints.mount(Static("\\[y]es", classes="hint hint-yes")) @@ -169,7 +171,8 @@ def _rebuild_content(self) -> None: hints.mount(Static("\\[d]on't ask again", classes="hint hint-never")) def show_notification( - self, text: str, severity: str = "info", timeout: float | None = 3.0 + self, text: str, severity: str = "info", timeout: float | None = 3.0, + agent_name: str | None = None, ) -> None: """Show a transient notification message. @@ -184,6 +187,7 @@ def show_notification( self._timer = None self._text = text + self._agent_name = agent_name self._severity = severity self.mode = "notification" self._rebuild_content() @@ -199,6 +203,7 @@ def show_confirm( allow_never: bool = False, default: str = "y", explicit_yes_required: bool = False, + agent_name: str | None = None, ) -> None: """Show a confirmation prompt. @@ -216,6 +221,7 @@ def show_confirm( self._timer = None self._text = question + self._agent_name = agent_name self._show_all = show_all self._allow_tweak = allow_tweak self._allow_never = allow_never From 951988eb03c5fd57969cd64c470aafece9f278e8 Mon Sep 17 00:00:00 2001 From: Jessica Mulein Date: Thu, 28 May 2026 08:26:34 -0700 Subject: [PATCH 07/37] chore: fix session tests and pass CI pre-commit Apply isort/black formatting on session encryption files. Give legacy test_sessions mocks explicit args (session_encrypt off, model fields) so save/load work with SessionManager encryption and Model restore paths. Co-authored-by: Cursor --- cecli/session_crypto.py | 4 +++- cecli/sessions.py | 7 ++----- tests/basic/test_session_crypto.py | 6 +++++- tests/basic/test_sessions.py | 12 ++++++++++++ 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/cecli/session_crypto.py b/cecli/session_crypto.py index dbbcab8a9a1..7d68d711edb 100644 --- a/cecli/session_crypto.py +++ b/cecli/session_crypto.py @@ -98,7 +98,9 @@ def decrypt_session_bytes(data: bytes, key: bytes) -> dict[str, Any]: try: plaintext = AESGCM(key).decrypt(nonce, ciphertext, None) except Exception as err: - raise SessionCryptoError("Could not decrypt session (wrong key or corrupted file).") from err + raise SessionCryptoError( + "Could not decrypt session (wrong key or corrupted file)." + ) from err try: parsed = json.loads(plaintext.decode("utf-8")) except json.JSONDecodeError as err: diff --git a/cecli/sessions.py b/cecli/sessions.py index d097b48fa4e..69b44c54c29 100644 --- a/cecli/sessions.py +++ b/cecli/sessions.py @@ -5,8 +5,7 @@ from pathlib import Path from typing import Dict, List, Optional -from cecli import models -from cecli import session_crypto +from cecli import models, session_crypto from cecli.helpers.conversation import ConversationService, MessageTag @@ -70,9 +69,7 @@ def _write_session_file(self, session_file: Path, session_data: dict) -> bool: f"({session_crypto.KEY_ENV} or --session-key-file)." ) return False - session_file.write_bytes( - session_crypto.encrypt_session_dict(session_data, key) - ) + session_file.write_bytes(session_crypto.encrypt_session_dict(session_data, key)) else: with open(session_file, "w", encoding="utf-8") as f: json.dump(session_data, f, indent=2) diff --git a/tests/basic/test_session_crypto.py b/tests/basic/test_session_crypto.py index 949e617adfb..965d6c14a4a 100644 --- a/tests/basic/test_session_crypto.py +++ b/tests/basic/test_session_crypto.py @@ -72,7 +72,11 @@ def test_empty_encrypted_body_raises(session_key32): def test_encrypted_file_roundtrip_on_disk(tmp_path, session_key32): path = tmp_path / "sess.json" - payload = {"version": 1, "session_name": "disk", "chat_history": {"done_messages": [], "cur_messages": []}} + payload = { + "version": 1, + "session_name": "disk", + "chat_history": {"done_messages": [], "cur_messages": []}, + } path.write_bytes(session_crypto.encrypt_session_dict(payload, session_key32)) raw = path.read_bytes() assert session_crypto.is_encrypted_payload(raw) diff --git a/tests/basic/test_sessions.py b/tests/basic/test_sessions.py index c6611e12909..9262df2d147 100644 --- a/tests/basic/test_sessions.py +++ b/tests/basic/test_sessions.py @@ -1,5 +1,6 @@ import json import os +from types import SimpleNamespace from unittest.mock import AsyncMock, MagicMock import pytest @@ -47,6 +48,17 @@ def mock_coder(): coder.mcp_manager = None coder.skills_manager = None coder.io.read_text.return_value = "some todo content" + coder.format_chat_chunks = MagicMock() + coder.args = SimpleNamespace( + model="test_model", + weak_model="test_weak_model", + editor_model="test_editor_model", + agent_model="test_agent_model", + editor_edit_format="editor-diff", + verbose=False, + session_encrypt=False, + session_key_file=None, + ) return coder From c73c7a40aca7550e8022a976e90012d158c87702 Mon Sep 17 00:00:00 2001 From: Jessica Mulein Date: Thu, 28 May 2026 14:38:34 -0700 Subject: [PATCH 08/37] fix(coder): Ollama-friendly empty LLM tool warning Replace legacy "provider account" copy when the model returns no tokens or tool calls; keep cloud-oriented hint for non-Ollama models. Co-authored-by: Cursor --- cecli/coders/base_coder.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/cecli/coders/base_coder.py b/cecli/coders/base_coder.py index 2b2fbdb40be..ff7a92829b6 100755 --- a/cecli/coders/base_coder.py +++ b/cecli/coders/base_coder.py @@ -2306,6 +2306,16 @@ async def check_tokens(self, messages): def get_active_model(self): return self.main_model + def empty_llm_tool_warning(self) -> str: + """Ollama-friendly copy for local models; cloud hint otherwise.""" + name = str(getattr(getattr(self, "main_model", None), "name", "") or "") + if "ollama" in name.lower(): + return ( + "Empty response from the local model (Ollama). " + "The model may have timed out, unloaded, or hit context limits." + ) + return "Empty response received from LLM. Check API keys, quota, or provider status." + async def send_message(self, inp): # Notify IO that LLM processing is starting self.io.llm_started() @@ -3363,7 +3373,7 @@ async def show_send_output(self, completion): and not len(self.partial_response_tool_calls) and not len(self.partial_response_reasoning_content) ): - self.io.tool_warning("Empty response received from LLM. Check your provider account?") + self.io.tool_warning(self.empty_llm_tool_warning()) self.io.assistant_output(show_resp, pretty=self.show_pretty()) @@ -3520,7 +3530,7 @@ async def show_send_output_stream(self, completion): return if not received_content and len(self.partial_response_tool_calls) == 0: - self.io.tool_warning("Empty response received from LLM. Check your provider account?") + self.io.tool_warning(self.empty_llm_tool_warning()) def consolidate_chunks(self): if self.partial_response_consolidated: From 3af6b45644fa331213b2ee01a8b3256d0ea2e0a6 Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 28 May 2026 23:12:52 -0400 Subject: [PATCH 09/37] Allow reaping sub agents by name/identifier --- cecli/commands/reap_agent.py | 155 ++++++++++++++++++++++++++++------- 1 file changed, 124 insertions(+), 31 deletions(-) diff --git a/cecli/commands/reap_agent.py b/cecli/commands/reap_agent.py index ce0beb6a24c..bbdbdf81874 100644 --- a/cecli/commands/reap_agent.py +++ b/cecli/commands/reap_agent.py @@ -1,6 +1,7 @@ """Reap-agent command - force destroys the active sub-agent.""" import weakref +from typing import List from cecli.helpers.agents.service import AgentService @@ -14,47 +15,100 @@ class ReapAgentCommand(BaseCommand): @classmethod async def execute(cls, io, coder, args, **kwargs): - """Destroy the active sub-agent and clean up its resources.""" - active_uuid = None - - # Use _get_tui logic (same as AgentService._get_tui) to safely - # dereference the TUI weakref. The TUI stores itself on coders - # as a weakref.ref, so we must call it to get the live object. - tui_ref = getattr(coder, "tui", None) - if tui_ref is not None: - if isinstance(tui_ref, weakref.ref): - tui_instance = tui_ref() - else: - tui_instance = tui_ref - if tui_instance is not None: - active_uuid = tui_instance._get_visible_coder().uuid + """Destroy a sub-agent and clean up its resources. - if not active_uuid: - io.tool_error("No active sub-agent to reap.") - return + If an agent identifier is provided, looks up the sub-agent by + name or UUID prefix (matching switch-agent semantics). Without + an argument the currently-active sub-agent (from the TUI) is + reaped. + """ + agent_identifier = args.strip() if args else "" - # Find the sub-agent info by UUID - agent_service = AgentService.get_instance(coder) + # --- Resolve the target UUID -------------------------------- # + agent_uuid = None target_name = None - target_info = None - for name, info in list(agent_service.sub_agents.items()): - if info.coder.uuid == active_uuid: - target_name = name - target_info = info - break - if target_name is None: - io.tool_error("Could not find sub-agent for the active container.") - return + if agent_identifier: + # Lookup logic mirroring switch-agent + agent_service = AgentService.get_instance(coder) + + # Try parsing "name (uuid_prefix)" format + if agent_identifier.endswith(")") and " (" in agent_identifier: + try: + uuid_prefix = agent_identifier.rsplit(" (", 1)[1][:-1] + for uuid, info in agent_service.sub_agents.items(): + if uuid.startswith(uuid_prefix): + agent_uuid = uuid + target_name = info.name + break + except IndexError: + pass + + # Try matching by name directly + if agent_uuid is None: + for uuid, info in agent_service.sub_agents.items(): + if info.name == agent_identifier: + agent_uuid = uuid + target_name = info.name + break + + # Try matching by UUID prefix directly + if agent_uuid is None: + for uuid, info in agent_service.sub_agents.items(): + if uuid.startswith(agent_identifier): + agent_uuid = uuid + target_name = info.name + break + + if agent_uuid is None: + io.tool_error(f"Error: Agent '{agent_identifier}' not found.") + return + + # Prevent reaping the primary coder + if agent_uuid == str(coder.uuid): + io.tool_error("Cannot reap the primary coder.") + return + + else: + # Original behaviour: reap the active sub-agent from the TUI + active_uuid = None + + # Use _get_tui logic (same as AgentService._get_tui) to safely + # dereference the TUI weakref. The TUI stores itself on coders + # as a weakref.ref, so we must call it to get the live object. + tui_ref = getattr(coder, "tui", None) + if tui_ref is not None: + if isinstance(tui_ref, weakref.ref): + tui_instance = tui_ref() + else: + tui_instance = tui_ref + if tui_instance is not None: + active_uuid = tui_instance._get_visible_coder().uuid + + if not active_uuid: + io.tool_error("No active sub-agent to reap.") + return + # Find the sub-agent info by UUID + agent_service = AgentService.get_instance(coder) + for name, info in list(agent_service.sub_agents.items()): + if info.coder.uuid == active_uuid: + agent_uuid = active_uuid + target_name = name + break + else: + io.tool_error("Could not find sub-agent for the active container.") + return + + # --- Cleanup ------------------------------------------------ # try: # Cleanup conversation resources from cecli.helpers.conversation.service import ConversationService - ConversationService.destroy_instances(target_info.coder.uuid) + ConversationService.destroy_instances(agent_uuid) # Remove from tracking and clean up - agent_service._cleanup_sub_agent(target_info.coder.uuid) + agent_service._cleanup_sub_agent(agent_uuid) io.tool_output(f"Sub-agent '{target_name}' reaped.") except (KeyError, AttributeError, RuntimeError) as e: @@ -62,6 +116,45 @@ async def execute(cls, io, coder, args, **kwargs): except Exception as e: io.tool_error(f"Unexpected error reaping sub-agent: {e}") + @classmethod + def get_completions(cls, io, coder, args) -> List[str]: + """Get completion options for reap-agent command.""" + try: + agent_service = AgentService.get_instance(coder) + names: List[str] = [] + + # Add sub-agent names + if agent_service and agent_service.sub_agents: + # First pass: count name occurrences + name_counts = {} + for uuid, sub_agent_info in agent_service.sub_agents.items(): + name_counts[sub_agent_info.name] = name_counts.get(sub_agent_info.name, 0) + 1 + + # Second pass: only show UUID prefix when name appears multiple times + for uuid, sub_agent_info in agent_service.sub_agents.items(): + name = sub_agent_info.name + if name_counts[name] > 1: + names.append(f"{name} ({uuid[:3]})") + else: + names.append(name) + + current_arg = args.strip().lower() + if current_arg: + return [name for name in names if name.lower().startswith(current_arg)] + else: + return names + except Exception: + return [] + @classmethod def get_help(cls) -> str: - return "Force destroy the active sub-agent (/reap-agent)" + help_text = "Force destroy the active sub-agent (/reap-agent)" + help_text += "\n\nUsage:\n" + help_text += " /reap-agent # Reap the currently active sub-agent\n" + help_text += " /reap-agent # Reap a sub-agent by name\n" + help_text += " /reap-agent # Reap a sub-agent by UUID prefix\n" + help_text += " /reap-agent () # Reap by name with UUID disambiguation\n" + help_text += "\nExamples:\n" + help_text += " /reap-agent reviewer\n" + help_text += " /reap-agent abc\n" + return help_text From d39efb5e4492f9e26467e4bbb5d29e4963c9492a Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 29 May 2026 02:33:10 -0400 Subject: [PATCH 10/37] Sub agent changes: - Unify /invoke-agent and /spawn-agent into a single non-blocking /spawn-agent command - Pass parent uuids for nested delegation appropriately --- cecli/commands/__init__.py | 3 - cecli/commands/invoke_agent.py | 54 ------------ cecli/commands/spawn_agent.py | 23 +++-- cecli/helpers/agents/service.py | 116 +++++++++++++++++++------ cecli/tools/delegate.py | 2 +- cecli/website/docs/config/subagents.md | 18 ++-- 6 files changed, 115 insertions(+), 101 deletions(-) delete mode 100644 cecli/commands/invoke_agent.py diff --git a/cecli/commands/__init__.py b/cecli/commands/__init__.py index 549cf12e19d..05e352a66ea 100644 --- a/cecli/commands/__init__.py +++ b/cecli/commands/__init__.py @@ -33,7 +33,6 @@ from .history_search import HistorySearchCommand from .hooks import HooksCommand from .include_skill import IncludeSkillCommand -from .invoke_agent import InvokeAgentCommand from .lint import LintCommand from .list_sessions import ListSessionsCommand from .list_skills import ListSkillsCommand @@ -117,7 +116,6 @@ CommandRegistry.register(HelpCommand) CommandRegistry.register(HistorySearchCommand) CommandRegistry.register(HooksCommand) -CommandRegistry.register(InvokeAgentCommand) CommandRegistry.register(ReapAgentCommand) CommandRegistry.register(SpawnAgentCommand) CommandRegistry.register(SwitchAgentCommand) @@ -200,7 +198,6 @@ "HistorySearchCommand", "HooksCommand", "IncludeSkillCommand", - "InvokeAgentCommand", "ReapAgentCommand", "SpawnAgentCommand", "SwitchAgentCommand", diff --git a/cecli/commands/invoke_agent.py b/cecli/commands/invoke_agent.py deleted file mode 100644 index 304e41df4e4..00000000000 --- a/cecli/commands/invoke_agent.py +++ /dev/null @@ -1,54 +0,0 @@ -"""Invoke-agent command - invokes a sub-agent with a prompt.""" - -from .utils.base_command import BaseCommand - - -class InvokeAgentCommand(BaseCommand): - NORM_NAME = "invoke-agent" - DESCRIPTION = "Invoke a sub-agent with a prompt (blocking)" - show_completion_notification = False - - @classmethod - async def execute(cls, io, coder, args, **kwargs): - """Invoke a sub-agent by name with a prompt.""" - from cecli.helpers.agents.service import AgentService - - parts = args.strip().split(maxsplit=1) - if not parts: - io.tool_error("Usage: /invoke-agent ") - return - - name = parts[0] - prompt = parts[1] if len(parts) > 1 else "" - - try: - agent_service = AgentService.get_instance(coder) - summary = await agent_service.invoke(name, prompt, blocking=True) - if summary: - from cecli.helpers.conversation.service import ConversationService - from cecli.helpers.conversation.tags import MessageTag - - ConversationService.get_manager(coder).add_message( - message_dict=dict(role="user", content=summary), - tag=MessageTag.CUR, - ) - io.tool_output(f"Sub-agent '{name}' completed:\n{summary}") - else: - io.tool_output(f"Sub-agent '{name}' completed (no summary).") - except ValueError as e: - io.tool_error(f"Error: {e}") - except RuntimeError as e: - io.tool_error(f"Error: {e}") - except Exception as e: - io.tool_error(f"Error invoking sub-agent '{name}': {e}") - - @classmethod - def get_help(cls) -> str: - return "Invoke a sub-agent with a prompt (/invoke-agent )" - - @classmethod - def get_completions(cls, io, coder, args) -> list[str]: - """Return registered sub-agent names for tab-completion.""" - from cecli.helpers.agents.service import AgentService - - return list(AgentService.get_registry().keys()) diff --git a/cecli/commands/spawn_agent.py b/cecli/commands/spawn_agent.py index afde0c2e799..33ffe3c7953 100644 --- a/cecli/commands/spawn_agent.py +++ b/cecli/commands/spawn_agent.py @@ -5,21 +5,30 @@ class SpawnAgentCommand(BaseCommand): NORM_NAME = "spawn-agent" - DESCRIPTION = "Spawn a sub-agent without a prompt (waits for user input)" + DESCRIPTION = "Spawn a sub-agent, optionally with a prompt" @classmethod async def execute(cls, io, coder, args, **kwargs): - """Spawn a sub-agent by name (non-blocking).""" + """Spawn a sub-agent by name, optionally with a prompt. + + Syntax: + /spawn-agent — Spawn without prompt (waits for user input) + /spawn-agent — Spawn and start processing the prompt immediately + """ from cecli.helpers.agents.service import AgentService - name = args.strip() - if not name: - io.tool_error("Usage: /spawn-agent ") + parts = args.strip().split(maxsplit=1) + if not parts: + io.tool_error("Usage: /spawn-agent []") return + name = parts[0] + prompt = parts[1] if len(parts) > 1 else None + try: agent_service = AgentService.get_instance(coder) - await agent_service.spawn(name) + await agent_service.spawn(name, prompt, parent=coder) + if coder.tui and coder.tui(): switch_key = coder.tui().get_keys_for("next_agent") io.tool_output(f"Sub-agent '{name}' spawned. " f"Switch to it with {switch_key}") @@ -32,7 +41,7 @@ async def execute(cls, io, coder, args, **kwargs): @classmethod def get_help(cls) -> str: - return "Spawn a sub-agent that waits for user input (/spawn-agent )" + return "Spawn a sub-agent, optionally with a prompt (/spawn-agent [])" @classmethod def get_completions(cls, io, coder, args) -> list[str]: diff --git a/cecli/helpers/agents/service.py b/cecli/helpers/agents/service.py index 72322b378eb..fc131030daa 100644 --- a/cecli/helpers/agents/service.py +++ b/cecli/helpers/agents/service.py @@ -307,7 +307,9 @@ def _check_max_sub_agents(self) -> None: "Wait for one to finish or use /reap-agent to free resources." ) - async def _create_sub_agent_coder(self, name: str) -> Tuple[Any, SubAgentInfo]: + async def _create_sub_agent_coder( + self, name: str, parent: Any = None + ) -> Tuple[Any, SubAgentInfo]: """Create a sub-agent coder, register it, and set up its container and prompt. Shared helper used by both ``invoke()`` and ``spawn()`` to eliminate @@ -315,6 +317,10 @@ async def _create_sub_agent_coder(self, name: str) -> Tuple[Any, SubAgentInfo]: Args: name: Name of the sub-agent to create. + parent: Optional coder instance to use as the parent. + If provided, the new sub-agent's ``parent_uuid`` will be + ``parent.uuid`` instead of ``self.coder.uuid``, enabling + nested sub-agent hierarchies. Defaults to ``self.coder``. Returns: Tuple of ``(new_coder, info)``. @@ -333,7 +339,7 @@ async def _create_sub_agent_coder(self, name: str) -> Tuple[Any, SubAgentInfo]: from cecli.coders import Coder - parent_coder = self.coder + parent_coder = parent if parent is not None else self.coder new_uuid = str(uuid4()) kwargs = dict( @@ -371,7 +377,7 @@ async def _create_sub_agent_coder(self, name: str) -> Tuple[Any, SubAgentInfo]: # Notify TUI to create a container try: - tui = self._get_tui(parent_coder) + tui = self._get_tui(self.coder) if tui is not None: tui.call_from_thread(tui.create_sub_agent_container, new_uuid, name) except Exception: @@ -464,9 +470,19 @@ async def _run_generate(): info.generate_task = task return task - async def invoke(self, name: str, prompt: str, blocking: bool = True) -> Optional[str]: - """Invoke a sub-agent by name with the given prompt (blocking by default).""" - new_coder, info = await self._create_sub_agent_coder(name) + async def invoke( + self, name: str, prompt: str, blocking: bool = True, parent: Any = None + ) -> Optional[str]: + """Invoke a sub-agent by name with the given prompt (blocking by default). + + Args: + name: Name of the sub-agent to invoke. + prompt: The user message to pass to the sub-agent. + blocking: If True, waits for completion and returns summary. + parent: Optional coder instance to use as the parent for nested + sub-agent hierarchies. Defaults to ``self.coder``. + """ + new_coder, info = await self._create_sub_agent_coder(name, parent) if not blocking: return None @@ -476,32 +492,58 @@ async def invoke(self, name: str, prompt: str, blocking: bool = True) -> Optiona await task return info.summary - async def spawn(self, name: str) -> None: - """Spawn a sub-agent (non-blocking) that waits for user input.""" - await self._create_sub_agent_coder(name) + async def spawn( + self, name: str, prompt: Optional[str] = None, parent: Any = None + ) -> Tuple[Any, SubAgentInfo]: + """Spawn a sub-agent (non-blocking) that waits for user input. + + Args: + name: Name of the sub-agent to spawn. + prompt: Optional prompt. If provided, starts the generate task + immediately with this prompt (fire-and-forget). + parent: Optional coder instance to use as the parent for nested + sub-agent hierarchies. Defaults to ``self.coder``. - async def wait(self, name: str) -> Optional[str]: - """Wait for a sub-agent to finish and return its summary.""" - # Find by name (allows multiple instances of the same agent type) - info = None - for candidate in self.sub_agents.values(): - if candidate.name == name: - info = candidate - break - if not info: - raise ValueError(f"No sub-agent named '{name}' running.") + Returns: + Tuple of ``(new_coder, info)`` so callers can further interact + with the sub-agent (e.g. call ``start_generate_task`` later). + """ + new_coder, info = await self._create_sub_agent_coder(name, parent) + if prompt: + self.start_generate_task(info, prompt) + return new_coder, info - if info.status == SubAgentStatus.FINISHED: - return info.summary + async def wait(self, parent: Any) -> List[str]: + """Await all active sub-agents whose ``parent_uuid`` matches ``parent.uuid``. - # Poll until finished - while info.status not in (SubAgentStatus.FINISHED, SubAgentStatus.ERROR): - await asyncio.sleep(0.5) + Waits for every child's generate task to finish (via ``asyncio.gather``) + and returns their summaries as a list. - if info.status == SubAgentStatus.ERROR: - raise RuntimeError(f"Sub-agent '{name}' failed: {info.error}") + Args: + parent: A coder instance (with ``.uuid``) or a UUID string whose + children should be awaited. - return info.summary + Returns: + ``List[str]`` — one summary per child sub-agent. May be empty + if the parent has no active children. + """ + uid = str(parent.uuid) if hasattr(parent, "uuid") else str(parent) + children = [info for info in self.sub_agents.values() if info.parent_uuid == uid] + if not children: + logger.debug("wait(%s): no children found", uid) + return [] + + # Collect all active generate tasks + tasks = [] + for info in children: + if info.generate_task is not None and not info.generate_task.done(): + tasks.append(info.generate_task) + + if tasks: + logger.debug("wait(%s): awaiting %d generate task(s)", uid, len(tasks)) + await asyncio.gather(*tasks) + + return [info.summary for info in children] def get_active_agents(self) -> List[Dict[str, Any]]: """Return list of active sub-agents for display.""" @@ -515,6 +557,26 @@ def get_active_agents(self) -> List[Dict[str, Any]]: for info in self.sub_agents.values() ] + def get_children(self, coder_or_uuid: Any) -> List[SubAgentInfo]: + """Return sub-agents whose parent is the given coder or UUID. + + Accepts either a coder instance (object with a ``uuid`` attribute) + or a plain UUID string. Returns all ``SubAgentInfo`` entries whose + ``parent_uuid`` matches the resolved identifier. + + Args: + coder_or_uuid: A coder instance (with ``.uuid``) or a UUID string. + + Returns: + List of ``SubAgentInfo`` objects whose parent is the given coder. + """ + if hasattr(coder_or_uuid, "uuid"): + uid = str(coder_or_uuid.uuid) + else: + uid = str(coder_or_uuid) + + return [info for info in self.sub_agents.values() if info.parent_uuid == uid] + # ------------------------------------------------------------------ # # Foreground agent tracking # ------------------------------------------------------------------ # diff --git a/cecli/tools/delegate.py b/cecli/tools/delegate.py index 29c4daff90b..e01cebc2e17 100644 --- a/cecli/tools/delegate.py +++ b/cecli/tools/delegate.py @@ -72,7 +72,7 @@ async def _run_one(name: str, prompt: str) -> tuple[bool, str]: """Run a single sub-agent and return a (success, formatted_message) tuple.""" try: agent_service._check_max_sub_agents() - summary = await agent_service.invoke(name, prompt, blocking=True) + summary = await agent_service.invoke(name, prompt, parent=coder, blocking=True) if summary: return True, f"Sub-agent '{name}' completed:\n{summary}" return True, f"Sub-agent '{name}' completed (no summary)." diff --git a/cecli/website/docs/config/subagents.md b/cecli/website/docs/config/subagents.md index 5f876fc3b8a..1d20b3ae6a8 100644 --- a/cecli/website/docs/config/subagents.md +++ b/cecli/website/docs/config/subagents.md @@ -67,21 +67,21 @@ agent-config: | Command | Description | |---------|-------------| -| `/invoke-agent ` | Invoke a sub-agent with a prompt (blocking — waits for completion) | | `/spawn-agent ` | Spawn a sub-agent without a prompt (non-blocking — waits for user input) | +| `/spawn-agent ` | Spawn a sub-agent with a prompt (non-blocking — starts processing immediately) | | `/reap-agent` | Force destroy the currently active sub-agent | -> **Tip**: Both `/invoke-agent` and `/spawn-agent` support tab completion of sub-agent names. +> **Tip**: `/spawn-agent` supports tab completion of sub-agent names. -### Invoking a Sub-Agent (Blocking) +### Spawning a Sub-Agent with a Prompt -The most common way to use sub-agents. The primary agent waits for the sub-agent to finish: +Spawns a sub-agent and immediately sends it a prompt to start processing (non-blocking): ``` -/invoke-agent reviewer Can you review the changes in editblock_func_coder.py? +/spawn-agent reviewer Can you review the changes in editblock_func_coder.py? ``` -This sends the prompt to the reviewer sub-agent, which works autonomously and returns a summary when done. +This spawns the reviewer sub-agent and sends it the prompt. The sub-agent begins working autonomously while you can continue interacting with the primary agent. ### Delegating from the Primary Agent @@ -93,7 +93,7 @@ The primary agent can also delegate work using the `Delegate` tool. This enables 4. Sub-agents work independently and return their summaries 5. The primary agent synthesizes the results -### Spawning a Sub-Agent (Non-Blocking) +### Spawning a Sub-Agent Without a Prompt Creates a sub-agent that waits for you to interact with it directly: @@ -174,7 +174,7 @@ and suggestions for improvement. ``` ``` -/invoke-agent reviewer Please review the last 5 commits in this branch +/spawn-agent reviewer Please review the last 5 commits in this branch ``` ### Example 2: Test Writing Workflow @@ -192,7 +192,7 @@ happy paths. Use the project's existing testing patterns and conventions. ``` ``` -/invoke-agent tester Write unit tests for the new AgentService.invoke() method +/spawn-agent tester Write unit tests for the new AgentService.invoke() method ``` ### Example 3: Multi-Agent Review From d88848ef40f972b9521ac1369b2e65028563fd19 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 29 May 2026 04:04:29 -0700 Subject: [PATCH 11/37] fix: Update cecli utils and fix TUI tests Co-authored-by: cecli (openai/gemini_cli_local/gemini-2.5-pro) --- cecli/utils.py | 4 +- tests/tui/test_app.py | 199 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 200 insertions(+), 3 deletions(-) diff --git a/cecli/utils.py b/cecli/utils.py index aac9b20b597..ebb928ad91b 100644 --- a/cecli/utils.py +++ b/cecli/utils.py @@ -8,7 +8,7 @@ import tempfile from pathlib import Path -import oslex +import shlex from cecli.dump import dump # noqa: F401 from cecli.waiting import Spinner @@ -437,7 +437,7 @@ def printable_shell_command(cmd_list): Returns: str: Shell-escaped command string. """ - return oslex.join(cmd_list) + return shlex.join(cmd_list) def split_concatenated_json(s: str) -> list[str]: diff --git a/tests/tui/test_app.py b/tests/tui/test_app.py index e6244d87cf2..22d1eed160a 100644 --- a/tests/tui/test_app.py +++ b/tests/tui/test_app.py @@ -31,7 +31,204 @@ def test_on_mouse_move_linux(tui_instance): """ Test that on_mouse_move does not stop the event on Linux. """ - with patch("platform.system", return_value="Linux"): + with patch("cecli.tui.app.IS_WINDOWS", False): mock_event = MagicMock(spec=events.MouseMove) tui_instance.on_mouse_move(mock_event) mock_event.stop.assert_not_called() + + + + +def test_handle_output_message_spinner_with_agent_name(tui_instance, monkeypatch): + """ + Test that spinner status messages display the agent name prefix + when a sub-agent is active. + """ + # Mock query_one to return mock widgets for all lookup types + mock_footer = MagicMock() + mock_footer.spinner_suffix = "" + mock_status_bar = MagicMock() + mock_input_area = MagicMock() + mock_input_container = MagicMock() + mock_output_container = MagicMock() + + def mock_query_one(selector, *args): + # query_one may be called with class or string selector + if isinstance(selector, type): + name = selector.__name__ + else: + # String selector - could be CSS like "#input, InputArea" + if "," in selector or "#" in selector: + return mock_input_area + name = "MainFooter" # Default fallback for footer lookup + + mapping = { + "MainFooter": mock_footer, + "StatusBar": mock_status_bar, + "InputContainer": mock_input_container, + "InputArea": mock_input_area, + "OutputContainer": mock_output_container, + } + return mapping.get(name, mock_footer) + + tui_instance.query_one = mock_query_one + + # Mock coder worker for agent service lookups + mock_coder = MagicMock() + mock_coder.uuid = "primary_uuid" + tui_instance.worker = MagicMock() + tui_instance.worker.coder = mock_coder + + # Mock AgentService so _resolve_agent_name works + mock_agent_service = MagicMock() + mock_agent_info = MagicMock() + mock_agent_info.name = "researcher" + mock_agent_info.coder = MagicMock() + mock_agent_info.coder.uuid = "some_uuid" + mock_agent_service.sub_agents = {"some_uuid": mock_agent_info} + mock_agent_service.coder = mock_coder + + monkeypatch.setattr( + "cecli.helpers.agents.service.AgentService.get_instance", + lambda *args: mock_agent_service, + ) + + # Test: sub-agent spinner should include agent_name="researcher" + msg = { + "type": "spinner", "action": "start", "text": "Thinking...", + "coder_uuid": "some_uuid", + } + tui_instance.handle_output_message(msg) + mock_footer.start_spinner.assert_called_once_with( + "Thinking...", agent_name="researcher" + ) + + # Test: primary agent spinner should have agent_name=None + mock_footer.reset_mock() + msg["coder_uuid"] = "primary_uuid" + tui_instance.handle_output_message(msg) + mock_footer.start_spinner.assert_called_once_with( + "Thinking...", agent_name=None + ) + +def test_handle_output_message_confirmation_with_agent_name(tui_instance, monkeypatch): + """ + Test that confirmation status messages display the agent name prefix. + """ + mock_footer = MagicMock() + mock_footer.spinner_suffix = "" + mock_status_bar = MagicMock() + mock_input_area = MagicMock() + mock_input_container = MagicMock() + mock_output_container = MagicMock() + + def mock_query_one(selector, *args): + if isinstance(selector, type): + name = selector.__name__ + else: + if selector == "#input" or selector == "#input, InputArea": + return mock_input_area + elif selector == "#status-bar" or selector == "#status-bar, StatusBar": + return mock_status_bar + name = "MainFooter" # Default fallback + + mapping = { + "MainFooter": mock_footer, + "StatusBar": mock_status_bar, + "InputContainer": mock_input_container, + "InputArea": mock_input_area, + "OutputContainer": mock_output_container, + } + return mapping.get(name, mock_footer) + + tui_instance.query_one = mock_query_one + + # Mock coder worker for agent service lookups + mock_coder = MagicMock() + mock_coder.uuid = "primary_uuid" + tui_instance.worker = MagicMock() + tui_instance.worker.coder = mock_coder + + # Stub status_bar reference + tui_instance.status_bar = mock_status_bar + + # Mock AgentService + mock_agent_service = MagicMock() + mock_agent_info = MagicMock() + mock_agent_info.name = "researcher" + mock_agent_info.coder = MagicMock() + mock_agent_info.coder.uuid = "some_uuid" + mock_agent_service.sub_agents = {"some_uuid": mock_agent_info} + mock_agent_service.coder = mock_coder + + monkeypatch.setattr( + "cecli.helpers.agents.service.AgentService.get_instance", + lambda *args: mock_agent_service, + ) + + # Test: sub-agent confirmation should include agent_name="researcher" + msg = { + "type": "confirmation", "question": "Are you sure?", + "options": {}, "coder_uuid": "some_uuid", + } + tui_instance.handle_output_message(msg) + mock_status_bar.show_confirm.assert_called_once_with( + "Are you sure?", show_all=False, allow_tweak=False, + allow_never=False, default="y", + explicit_yes_required=False, agent_name="researcher", + ) + +def test_handle_output_message_error_with_agent_name(tui_instance, monkeypatch): + """ + Test that error status messages display the agent name prefix. + """ + mock_footer = MagicMock() + mock_footer.spinner_suffix = "" + mock_status_bar = MagicMock() + mock_input_area = MagicMock() + mock_input_container = MagicMock() + mock_output_container = MagicMock() + + def mock_query_one(selector, *args): + if isinstance(selector, type): + name = selector.__name__ + else: + if "," in selector or "#" in selector: + return mock_input_area + return mock_footer + mapping = { + "MainFooter": mock_footer, + "StatusBar": mock_status_bar, + "InputContainer": mock_input_container, + "InputArea": mock_input_area, + "OutputContainer": mock_output_container, + } + return mapping.get(name, mock_footer) + + tui_instance.query_one = mock_query_one + + # Mock coder worker for agent service lookups + mock_coder = MagicMock() + mock_coder.uuid = "primary_uuid" + tui_instance.worker = MagicMock() + tui_instance.worker.coder = mock_coder + + # Stub status_bar reference + tui_instance.status_bar = mock_status_bar + + # Mock AgentService - unknown UUID should return None (no prefix) + monkeypatch.setattr( + "cecli.helpers.agents.service.AgentService.get_instance", + lambda *args: MagicMock(sub_agents={}, coder=mock_coder), + ) + + # Test: error message for unknown agent should have agent_name=None + msg = { + "type": "error", "message": "Something went wrong!", + "coder_uuid": "unknown_uuid", + } + tui_instance.handle_output_message(msg) + mock_status_bar.show_notification.assert_called_once_with( + "Something went wrong!", severity="error", timeout=5, + agent_name=None, + ) From 8aa0c9298f71b541dc85e8692e26964d65d22436 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 29 May 2026 12:41:15 -0700 Subject: [PATCH 12/37] feat: Add agent name prefixes to TUI status messages Co-authored-by: cecli (openai/nvidia_nim/deepseek-ai/deepseek-v4-pro) --- cecli/tui/app.py | 3 ++- cecli/tui/widgets/footer.py | 10 ++++------ cecli/tui/widgets/status_bar.py | 2 ++ 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/cecli/tui/app.py b/cecli/tui/app.py index 15c2a12252a..730b35d9a74 100644 --- a/cecli/tui/app.py +++ b/cecli/tui/app.py @@ -583,7 +583,8 @@ def _resolve_agent_name(self, coder_uuid: str | None) -> str | None: for info in agent_service.sub_agents.values(): if str(info.coder.uuid) == coder_uuid: return info.name - except Exception: + except (AttributeError, ImportError, KeyError): + # Agent service not available or coder not yet initialized pass return None def add_output(self, text, task_id=None): diff --git a/cecli/tui/widgets/footer.py b/cecli/tui/widgets/footer.py index 52523963b53..c80739c5ca0 100644 --- a/cecli/tui/widgets/footer.py +++ b/cecli/tui/widgets/footer.py @@ -100,11 +100,10 @@ def render(self) -> Text: if self.spinner_visible: spinner_char = self._spinner_chars[self._spinner_frame] left.append(f"{spinner_char} ") - if self.spinner_text: - left.append(self.spinner_text) if self.agent_name: left.append(f"({self.agent_name}) ") - + if self.spinner_text: + left.append(self.spinner_text) # When a sub-agent is generating, show its model alongside the spinner # if self._has_running_sub_agent(): # model_display = self._get_display_model() @@ -180,9 +179,8 @@ def update_mode(self, mode: str): self.coder_mode = mode self.refresh() - def start_spinner(self, text: str = ""): - """Show spinner with optional text.""" def start_spinner(self, text: str = "", agent_name: str = ""): + """Show spinner with optional text.""" self.spinner_text = text self.agent_name = agent_name self.spinner_visible = True @@ -210,8 +208,8 @@ def stop_spinner(self): self.spinner_visible = False self.spinner_text = "" + self.agent_name = "" self.refresh() - def _has_running_sub_agent(self) -> bool: """Check if any agent is currently generating output.""" try: diff --git a/cecli/tui/widgets/status_bar.py b/cecli/tui/widgets/status_bar.py index 66925df3176..708a3467965 100644 --- a/cecli/tui/widgets/status_bar.py +++ b/cecli/tui/widgets/status_bar.py @@ -180,6 +180,7 @@ def show_notification( text: Message to display severity: One of "info", "warning", "error", "success" timeout: Auto-dismiss after this many seconds (None = no auto-dismiss) + agent_name: Optional agent name to prefix the message with """ # Cancel any existing timer if self._timer: @@ -214,6 +215,7 @@ def show_confirm( allow_never: Whether to show "don't ask again" option default: Default response ("y" or "n") explicit_yes_required: Whether explicit yes is required + agent_name: Optional agent name to prefix the question with """ # Cancel any existing timer if self._timer: From 2baaf4e22628663acdf20468ad5d4adb7e6aef67 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 29 May 2026 13:59:31 -0700 Subject: [PATCH 13/37] fix: Pass coder_uuid to spinner start calls Co-authored-by: cecli (openai/agentic) --- cecli/coders/base_coder.py | 8 +++----- cecli/tui/app.py | 13 ++++++++----- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/cecli/coders/base_coder.py b/cecli/coders/base_coder.py index 2b2fbdb40be..6b189419cc6 100755 --- a/cecli/coders/base_coder.py +++ b/cecli/coders/base_coder.py @@ -1597,7 +1597,7 @@ async def output_task(self, preproc): self.io.output_task = asyncio.create_task(self.generate(user_message, preproc)) # Start spinner for output task - self.io.start_spinner("Processing...") + self.io.start_spinner("Processing...", coder_uuid=getattr(self, 'uuid', None)) await self.io.recreate_input() # Monitor output task @@ -2365,7 +2365,7 @@ async def format_in_executor(): if not self.tui: spinner_text += f" • ${self.format_cost(self.total_cost)} session" - self.io.start_spinner(spinner_text) + self.io.start_spinner(spinner_text, coder_uuid=getattr(self, 'uuid', None)) if self.stream: self.mdstream = True else: @@ -2452,9 +2452,7 @@ async def format_in_executor(): self.mdstream = None # Ensure any waiting spinner is stopped - self.io.start_spinner("Processing Answer...") - - self.partial_response_content = self.get_multi_response_content_in_progress(True) + self.io.start_spinner("Processing Answer...", coder_uuid=getattr(self, 'uuid', None)) self.remove_reasoning_content() self.multi_response_content = "" diff --git a/cecli/tui/app.py b/cecli/tui/app.py index 730b35d9a74..ccbd3162a0c 100644 --- a/cecli/tui/app.py +++ b/cecli/tui/app.py @@ -807,15 +807,18 @@ def on_input_area_submit(self, message: InputArea.Submit): # Update footer to show processing footer = self.query_one(MainFooter) - footer.start_spinner("Processing...") - + coder = self.worker.coder + # Determine which coder is in the foreground for input routing + foreground_coder = AgentService.get_instance(coder).foreground_coder + coder_uuid = str(foreground_coder.uuid) if foreground_coder and hasattr(foreground_coder, "uuid") else None + agent_name = self._resolve_agent_name(coder_uuid) + + footer.start_spinner("Processing...", agent_name=agent_name or "") if coder: - coder.io.start_spinner("Processing...") + coder.io.start_spinner("Processing...", coder_uuid=coder_uuid) - # Determine which coder is in the foreground for input routing - foreground_coder = AgentService.get_instance(coder).foreground_coder if coder and is_active(getattr(coder.io, "output_task", None)): from cecli.helpers.conversation import ConversationService, MessageTag From baffb29cadcfde721f9735cb691eefefc3d3de64 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 29 May 2026 14:17:41 -0700 Subject: [PATCH 14/37] fix: Prefix primary agent status when sub-agents exist Co-authored-by: cecli (openai/agentic) --- cecli/tui/app.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cecli/tui/app.py b/cecli/tui/app.py index ccbd3162a0c..e065fb90449 100644 --- a/cecli/tui/app.py +++ b/cecli/tui/app.py @@ -579,6 +579,8 @@ def _resolve_agent_name(self, coder_uuid: str | None) -> str | None: agent_service = AgentService.get_instance(self.worker.coder) primary_uuid = str(agent_service.coder.uuid) if coder_uuid == primary_uuid: + if agent_service.sub_agents: + return "primary" return None # Primary agent gets no prefix for info in agent_service.sub_agents.values(): if str(info.coder.uuid) == coder_uuid: From 79e99db640683816c55aeac9da658ddde11072cb Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 29 May 2026 15:38:34 -0700 Subject: [PATCH 15/37] fix: Add UUID disambiguation for duplicate agent names Co-authored-by: cecli (openai/agentic) --- cecli/tui/app.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/cecli/tui/app.py b/cecli/tui/app.py index e065fb90449..6a1f6a0b041 100644 --- a/cecli/tui/app.py +++ b/cecli/tui/app.py @@ -569,7 +569,11 @@ def _resolve_agent_name(self, coder_uuid: str | None) -> str | None: """Resolve an agent display name from a coder_uuid. Returns the sub-agent's name if the coder_uuid belongs to a known - sub-agent, otherwise None (primary agent uses no prefix). + sub-agent. For the primary agent, returns "primary" if sub-agents + exist, otherwise None. + + If multiple sub-agents share the same name, disambiguates by + appending the first 3 characters of the UUID in parentheses. """ if not coder_uuid: return None @@ -584,6 +588,15 @@ def _resolve_agent_name(self, coder_uuid: str | None) -> str | None: return None # Primary agent gets no prefix for info in agent_service.sub_agents.values(): if str(info.coder.uuid) == coder_uuid: + # Check for duplicate names among sub-agents + name_count = sum( + 1 for i in agent_service.sub_agents.values() + if i.name == info.name + ) + if name_count > 1: + # Disambiguate with first 3 UUID characters + short_uuid = str(info.coder.uuid)[:3] + return f"{info.name} ({short_uuid})" return info.name except (AttributeError, ImportError, KeyError): # Agent service not available or coder not yet initialized @@ -809,7 +822,7 @@ def on_input_area_submit(self, message: InputArea.Submit): # Update footer to show processing footer = self.query_one(MainFooter) - + coder = self.worker.coder # Determine which coder is in the foreground for input routing foreground_coder = AgentService.get_instance(coder).foreground_coder From 3929bfb2f972938dc3a4837674cc83d6d9f064c2 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 29 May 2026 17:34:06 -0700 Subject: [PATCH 16/37] refactor: Add coder_uuid to TUI messages and tests --- cecli/tui/io.py | 11 +++-------- cecli/utils.py | 3 +-- tests/tui/test_app.py | 39 +++++++++++++++++++++++---------------- 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/cecli/tui/io.py b/cecli/tui/io.py index ff20b443de0..36dcb6a543f 100644 --- a/cecli/tui/io.py +++ b/cecli/tui/io.py @@ -408,14 +408,7 @@ def stop_spinner(self, **kwargs): super().stop_spinner() # Send to TUI - self.output_queue.put( - { - "type": "spinner", - "action": "stop", - } - "coder_uuid": coder_uuid, - "coder_uuid": coder_uuid, - ) + self.output_queue.put({"type": "spinner", "action": "stop", "coder_uuid": coder_uuid}) def interrupt_input(self): self.interrupted = True @@ -531,6 +524,7 @@ async def confirm_ask( allow_never=False, allow_tweak=False, acknowledge=False, + coder_uuid=None, ): """Override confirm_ask to show modal instead of inline prompt. @@ -607,6 +601,7 @@ async def confirm_ask( "acknowledge": acknowledge, "valid_responses": valid_responses, }, + "coder_uuid": coder_uuid, } ) diff --git a/cecli/utils.py b/cecli/utils.py index ebb928ad91b..b8a009c07eb 100644 --- a/cecli/utils.py +++ b/cecli/utils.py @@ -2,14 +2,13 @@ import json import os import platform +import shlex import shutil import subprocess import sys import tempfile from pathlib import Path -import shlex - from cecli.dump import dump # noqa: F401 from cecli.waiting import Spinner diff --git a/tests/tui/test_app.py b/tests/tui/test_app.py index 22d1eed160a..5d008b93ad3 100644 --- a/tests/tui/test_app.py +++ b/tests/tui/test_app.py @@ -37,8 +37,6 @@ def test_on_mouse_move_linux(tui_instance): mock_event.stop.assert_not_called() - - def test_handle_output_message_spinner_with_agent_name(tui_instance, monkeypatch): """ Test that spinner status messages display the agent name prefix @@ -95,21 +93,20 @@ def mock_query_one(selector, *args): # Test: sub-agent spinner should include agent_name="researcher" msg = { - "type": "spinner", "action": "start", "text": "Thinking...", + "type": "spinner", + "action": "start", + "text": "Thinking...", "coder_uuid": "some_uuid", } tui_instance.handle_output_message(msg) - mock_footer.start_spinner.assert_called_once_with( - "Thinking...", agent_name="researcher" - ) + mock_footer.start_spinner.assert_called_once_with("Thinking...", agent_name="researcher") # Test: primary agent spinner should have agent_name=None mock_footer.reset_mock() msg["coder_uuid"] = "primary_uuid" tui_instance.handle_output_message(msg) - mock_footer.start_spinner.assert_called_once_with( - "Thinking...", agent_name=None - ) + mock_footer.start_spinner.assert_called_once_with("Thinking...", agent_name=None) + def test_handle_output_message_confirmation_with_agent_name(tui_instance, monkeypatch): """ @@ -168,16 +165,23 @@ def mock_query_one(selector, *args): # Test: sub-agent confirmation should include agent_name="researcher" msg = { - "type": "confirmation", "question": "Are you sure?", - "options": {}, "coder_uuid": "some_uuid", + "type": "confirmation", + "question": "Are you sure?", + "options": {}, + "coder_uuid": "some_uuid", } tui_instance.handle_output_message(msg) mock_status_bar.show_confirm.assert_called_once_with( - "Are you sure?", show_all=False, allow_tweak=False, - allow_never=False, default="y", - explicit_yes_required=False, agent_name="researcher", + "Are you sure?", + show_all=False, + allow_tweak=False, + allow_never=False, + default="y", + explicit_yes_required=False, + agent_name="researcher", ) + def test_handle_output_message_error_with_agent_name(tui_instance, monkeypatch): """ Test that error status messages display the agent name prefix. @@ -224,11 +228,14 @@ def mock_query_one(selector, *args): # Test: error message for unknown agent should have agent_name=None msg = { - "type": "error", "message": "Something went wrong!", + "type": "error", + "message": "Something went wrong!", "coder_uuid": "unknown_uuid", } tui_instance.handle_output_message(msg) mock_status_bar.show_notification.assert_called_once_with( - "Something went wrong!", severity="error", timeout=5, + "Something went wrong!", + severity="error", + timeout=5, agent_name=None, ) From 5c6936f1f0e436a244ce2bb746ed8564145e9831 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 29 May 2026 17:36:46 -0700 Subject: [PATCH 17/37] cli-39: fixed linting --- cecli/coders/base_coder.py | 6 +++--- cecli/tui/app.py | 12 +++++++----- cecli/tui/widgets/footer.py | 1 + cecli/tui/widgets/status_bar.py | 6 +++++- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/cecli/coders/base_coder.py b/cecli/coders/base_coder.py index 6b189419cc6..ff3dd190ea5 100755 --- a/cecli/coders/base_coder.py +++ b/cecli/coders/base_coder.py @@ -1597,7 +1597,7 @@ async def output_task(self, preproc): self.io.output_task = asyncio.create_task(self.generate(user_message, preproc)) # Start spinner for output task - self.io.start_spinner("Processing...", coder_uuid=getattr(self, 'uuid', None)) + self.io.start_spinner("Processing...", coder_uuid=getattr(self, "uuid", None)) await self.io.recreate_input() # Monitor output task @@ -2365,7 +2365,7 @@ async def format_in_executor(): if not self.tui: spinner_text += f" • ${self.format_cost(self.total_cost)} session" - self.io.start_spinner(spinner_text, coder_uuid=getattr(self, 'uuid', None)) + self.io.start_spinner(spinner_text, coder_uuid=getattr(self, "uuid", None)) if self.stream: self.mdstream = True else: @@ -2452,7 +2452,7 @@ async def format_in_executor(): self.mdstream = None # Ensure any waiting spinner is stopped - self.io.start_spinner("Processing Answer...", coder_uuid=getattr(self, 'uuid', None)) + self.io.start_spinner("Processing Answer...", coder_uuid=getattr(self, "uuid", None)) self.remove_reasoning_content() self.multi_response_content = "" diff --git a/cecli/tui/app.py b/cecli/tui/app.py index 6a1f6a0b041..35dca51b64e 100644 --- a/cecli/tui/app.py +++ b/cecli/tui/app.py @@ -564,7 +564,6 @@ def handle_output_message(self, msg): else: self._switch_to_container(target_uuid) - def _resolve_agent_name(self, coder_uuid: str | None) -> str | None: """Resolve an agent display name from a coder_uuid. @@ -590,8 +589,7 @@ def _resolve_agent_name(self, coder_uuid: str | None) -> str | None: if str(info.coder.uuid) == coder_uuid: # Check for duplicate names among sub-agents name_count = sum( - 1 for i in agent_service.sub_agents.values() - if i.name == info.name + 1 for i in agent_service.sub_agents.values() if i.name == info.name ) if name_count > 1: # Disambiguate with first 3 UUID characters @@ -602,6 +600,7 @@ def _resolve_agent_name(self, coder_uuid: str | None) -> str | None: # Agent service not available or coder not yet initialized pass return None + def add_output(self, text, task_id=None): """Add output to the output container.""" output_container = self.query_one("#output", OutputContainer) @@ -826,7 +825,11 @@ def on_input_area_submit(self, message: InputArea.Submit): coder = self.worker.coder # Determine which coder is in the foreground for input routing foreground_coder = AgentService.get_instance(coder).foreground_coder - coder_uuid = str(foreground_coder.uuid) if foreground_coder and hasattr(foreground_coder, "uuid") else None + coder_uuid = ( + str(foreground_coder.uuid) + if foreground_coder and hasattr(foreground_coder, "uuid") + else None + ) agent_name = self._resolve_agent_name(coder_uuid) footer.start_spinner("Processing...", agent_name=agent_name or "") @@ -834,7 +837,6 @@ def on_input_area_submit(self, message: InputArea.Submit): if coder: coder.io.start_spinner("Processing...", coder_uuid=coder_uuid) - if coder and is_active(getattr(coder.io, "output_task", None)): from cecli.helpers.conversation import ConversationService, MessageTag diff --git a/cecli/tui/widgets/footer.py b/cecli/tui/widgets/footer.py index c80739c5ca0..b0e1b0e6534 100644 --- a/cecli/tui/widgets/footer.py +++ b/cecli/tui/widgets/footer.py @@ -210,6 +210,7 @@ def stop_spinner(self): self.spinner_text = "" self.agent_name = "" self.refresh() + def _has_running_sub_agent(self) -> bool: """Check if any agent is currently generating output.""" try: diff --git a/cecli/tui/widgets/status_bar.py b/cecli/tui/widgets/status_bar.py index 708a3467965..b198a30cb27 100644 --- a/cecli/tui/widgets/status_bar.py +++ b/cecli/tui/widgets/status_bar.py @@ -134,6 +134,7 @@ def __init__(self, **kwargs): self._default = "y" self._explicit_yes_required = False self._timer = None + def compose(self) -> ComposeResult: """Create empty container - content added dynamically.""" yield Horizontal(classes="status-content") @@ -171,7 +172,10 @@ def _rebuild_content(self) -> None: hints.mount(Static("\\[d]on't ask again", classes="hint hint-never")) def show_notification( - self, text: str, severity: str = "info", timeout: float | None = 3.0, + self, + text: str, + severity: str = "info", + timeout: float | None = 3.0, agent_name: str | None = None, ) -> None: """Show a transient notification message. From 4c7aba03511757111bbeae08ba3177da69d5a37f Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 29 May 2026 19:31:40 -0700 Subject: [PATCH 18/37] fix: Uncomment conversation promotion and ensure agent_name is string Co-authored-by: cecli (openai/gemini_cli_local/gemini-2.5-pro) --- cecli/coders/base_coder.py | 8 ++++---- cecli/tui/app.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cecli/coders/base_coder.py b/cecli/coders/base_coder.py index ff3dd190ea5..b8e701a35d8 100755 --- a/cecli/coders/base_coder.py +++ b/cecli/coders/base_coder.py @@ -2921,8 +2921,8 @@ async def process_tool_calls(self, tool_call_response): message_dict=tool_response, tag=MessageTag.CUR, hash_key=(tool_response["tool_call_id"], str(time.monotonic_ns())), - # promotion=ConversationService.get_manager(self).DEFAULT_TAG_PROMOTION_VALUE, - # mark_for_demotion=1, + promotion=ConversationService.get_manager(self).DEFAULT_TAG_PROMOTION_VALUE, + mark_for_demotion=1, ) return bool(tool_responses) @@ -3135,8 +3135,8 @@ async def add_assistant_reply_to_cur_messages(self): message_dict=msg, tag=MessageTag.CUR, hash_key=("assistant_message", str(msg), str(time.monotonic_ns())), - # promotion=ConversationService.get_manager(self).DEFAULT_TAG_PROMOTION_VALUE, - # mark_for_demotion=1, + promotion=ConversationService.get_manager(self).DEFAULT_TAG_PROMOTION_VALUE, + mark_for_demotion=1, ) def get_file_mentions(self, content, ignore_current=False): diff --git a/cecli/tui/app.py b/cecli/tui/app.py index 35dca51b64e..86ac5090d7b 100644 --- a/cecli/tui/app.py +++ b/cecli/tui/app.py @@ -702,7 +702,7 @@ def update_spinner(self, msg, agent_name: str | None = None): action = msg.get("action", "start") if action == "start": - footer.start_spinner(msg.get("text", ""), agent_name=agent_name) + footer.start_spinner(msg.get("text", ""), agent_name=agent_name or "") elif action == "update": footer.spinner_text = msg.get("text", "") elif action == "update_suffix": From 5d59ec163168b2f8b99179ce0ab52a165ca4af2c Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 29 May 2026 19:44:51 -0700 Subject: [PATCH 19/37] fix: Improve footer widget robustness in test environments Co-authored-by: cecli (openai/gemini_cli_local/gemini-2.5-pro) --- cecli/tui/widgets/footer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cecli/tui/widgets/footer.py b/cecli/tui/widgets/footer.py index b0e1b0e6534..5f77cdae230 100644 --- a/cecli/tui/widgets/footer.py +++ b/cecli/tui/widgets/footer.py @@ -79,7 +79,7 @@ def _get_display_model(self) -> str: else: name = coder.get_active_model().name except Exception: - name = self.app.worker.coder.get_active_model().name + name = self.model_name # Strip common prefixes like "openrouter/x-ai/" if len(name) > 40: From 9b6f1eebc88449ee885b22f1b92dbcd0710c6135 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 29 May 2026 20:04:12 -0700 Subject: [PATCH 20/37] fix: Improve agent name resolution and conversation history handling Co-authored-by: cecli (openai/gemini_cli_local/gemini-2.5-pro) --- cecli/coders/base_coder.py | 8 ++++---- cecli/tui/app.py | 12 +++++++++++- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/cecli/coders/base_coder.py b/cecli/coders/base_coder.py index b8e701a35d8..ff3dd190ea5 100755 --- a/cecli/coders/base_coder.py +++ b/cecli/coders/base_coder.py @@ -2921,8 +2921,8 @@ async def process_tool_calls(self, tool_call_response): message_dict=tool_response, tag=MessageTag.CUR, hash_key=(tool_response["tool_call_id"], str(time.monotonic_ns())), - promotion=ConversationService.get_manager(self).DEFAULT_TAG_PROMOTION_VALUE, - mark_for_demotion=1, + # promotion=ConversationService.get_manager(self).DEFAULT_TAG_PROMOTION_VALUE, + # mark_for_demotion=1, ) return bool(tool_responses) @@ -3135,8 +3135,8 @@ async def add_assistant_reply_to_cur_messages(self): message_dict=msg, tag=MessageTag.CUR, hash_key=("assistant_message", str(msg), str(time.monotonic_ns())), - promotion=ConversationService.get_manager(self).DEFAULT_TAG_PROMOTION_VALUE, - mark_for_demotion=1, + # promotion=ConversationService.get_manager(self).DEFAULT_TAG_PROMOTION_VALUE, + # mark_for_demotion=1, ) def get_file_mentions(self, content, ignore_current=False): diff --git a/cecli/tui/app.py b/cecli/tui/app.py index 86ac5090d7b..9d151bff074 100644 --- a/cecli/tui/app.py +++ b/cecli/tui/app.py @@ -577,19 +577,29 @@ def _resolve_agent_name(self, coder_uuid: str | None) -> str | None: if not coder_uuid: return None try: + if not self.worker or not self.worker.coder: + return None # Cannot resolve without a coder from cecli.helpers.agents.service import AgentService agent_service = AgentService.get_instance(self.worker.coder) + if not agent_service: + return None primary_uuid = str(agent_service.coder.uuid) if coder_uuid == primary_uuid: if agent_service.sub_agents: return "primary" return None # Primary agent gets no prefix + if not agent_service.sub_agents: + return None for info in agent_service.sub_agents.values(): + if not info or not info.coder: + continue if str(info.coder.uuid) == coder_uuid: # Check for duplicate names among sub-agents name_count = sum( - 1 for i in agent_service.sub_agents.values() if i.name == info.name + 1 + for i in agent_service.sub_agents.values() + if i and hasattr(i, "name") and i.name == info.name ) if name_count > 1: # Disambiguate with first 3 UUID characters From 6d899523c5e6b78be5512c93b50dc04aa2d0a976 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sat, 30 May 2026 12:46:59 -0400 Subject: [PATCH 21/37] - Make `Delegate` tool non-blocking - Change `Finished` tool to `Yield` tool --- cecli/coders/agent_coder.py | 47 ++++++- cecli/helpers/agents/service.py | 143 ++++++++++++++++++-- cecli/helpers/conversation/integration.py | 30 +++++ cecli/prompts/agent.yml | 2 +- cecli/prompts/subagent.yml | 7 +- cecli/tools/__init__.py | 4 +- cecli/tools/_yield.py | 153 ++++++++++++++++++++++ cecli/tools/delegate.py | 43 +++--- cecli/tools/finished.py | 86 ------------ cecli/tools/utils/registry.py | 2 +- cecli/website/docs/config/agent-mode.md | 6 +- cecli/website/docs/config/subagents.md | 2 +- tests/subagents/test_commands.py | 109 +-------------- tests/subagents/test_delegate.py | 58 +++++--- tests/subagents/test_finished.py | 21 ++- tests/subagents/test_service.py | 67 ++++++---- tests/tools/test_registry.py | 16 +-- 17 files changed, 486 insertions(+), 310 deletions(-) create mode 100644 cecli/tools/_yield.py delete mode 100644 cecli/tools/finished.py diff --git a/cecli/coders/agent_coder.py b/cecli/coders/agent_coder.py index 9c5e5816a03..822e640d0b5 100644 --- a/cecli/coders/agent_coder.py +++ b/cecli/coders/agent_coder.py @@ -540,6 +540,10 @@ def format_chat_chunks(self): # Add post-message context blocks (priority 250 - between CUR and REMINDER) ConversationService.get_chunks(self).add_post_message_context_blocks() + + # Add sub-agent states context block (same priority as post-message blocks) + ConversationService.get_chunks(self).add_sub_agent_states() + ConversationService.get_chunks(self).add_randomized_cta() return ConversationService.get_manager(self).get_messages_dict() @@ -938,7 +942,7 @@ async def reply_completed(self): if self.tool_call_vectors: if content and not tool_calls_found and self.num_reflections < self.max_reflections: self.reflected_message = ( - "Continue with your task. If you have completed it, call the `Finished` tool." + "Continue with your task. If you have completed it, call the `Yield` tool." ) return True @@ -1490,12 +1494,53 @@ def get_sub_agents_context(self): result += "\n" result += "Use the `Delegate` tool with the sub-agent name to delegate tasks.\n" + result += "Use the `Yield` tool to wait for responses for all active sub agents.\n" result += "" return result except Exception as e: self.io.tool_error(f"Error generating sub-agents context: {str(e)}") return None + def get_child_agent_states(self): + """Get the state of all active child sub-agents. + + Returns a formatted context block with each child sub-agent's name, + UUID, and current status, or None if no children exist. + This is used by ConversationChunks.add_sub_agent_states() to provide + the model with visibility into active sub-agent states. + """ + if not self.use_enhanced_context: + return None + + # Sub-agents should only see child states when nested delegation is enabled + if hasattr(self, "parent_uuid") and self.parent_uuid: + if not self.agent_config.get("allow_nested_delegation", False): + return None + + try: + service = AgentService.get_instance(self) + children = service.get_children(self) + + if not children: + return None + + result = '\n' + result += "## Active Sub-Agent States\n\n" + result += f"Found {len(children)} active child sub-agent(s):\n\n" + + for info in children: + result += f"**{info.name}**:\n" + result += f" - UUID: `{info.coder.uuid}`\n" + result += f" - Status: {info.status.value}\n" + if info.error: + result += f" - Error: {info.error}\n" + result += "\n" + result += "" + return result + except Exception as e: + self.io.tool_error(f"Error generating child agent states: {str(e)}") + return None + def get_background_command_output(self): """ Get background command output to append after the main message. diff --git a/cecli/helpers/agents/service.py b/cecli/helpers/agents/service.py index fc131030daa..34fe637901b 100644 --- a/cecli/helpers/agents/service.py +++ b/cecli/helpers/agents/service.py @@ -6,6 +6,7 @@ import asyncio import logging +import time import weakref from dataclasses import dataclass from enum import Enum @@ -59,6 +60,10 @@ class AgentService: _global_registry: Dict[str, Any] = {} # name -> SubAgentConfig (from .md files) # UUID -> weakref of coder instance for convenient lookup _uuid_coder_map: Dict[str, weakref.ref] = {} + # Lock pools keyed by parent UUID — created lazily so only parents that + # actually use them allocate a lock. + _spawn_locks: Dict[str, asyncio.Lock] = {} + _conversation_locks: Dict[str, asyncio.Lock] = {} # ------------------------------------------------------------------ # # Singleton @@ -198,6 +203,13 @@ def max_sub_agents(self) -> int: # ------------------------------------------------------------------ # # Internal helpers + @classmethod + def _get_lock(cls, pool: Dict[str, asyncio.Lock], uuid: str) -> asyncio.Lock: + """Return a lock for *uuid* from *pool*, creating one if absent.""" + if uuid not in pool: + pool[uuid] = asyncio.Lock() + return pool[uuid] + @staticmethod def _get_tui(coder: Any) -> Any: """Dereference the TUI weakref from a coder, returning None if unavailable. @@ -335,12 +347,18 @@ async def _create_sub_agent_coder( f"Unknown sub-agent '{name}'. " f"Available: {list(self._global_registry.keys())}" ) - self._check_max_sub_agents() + # Critical section: max-sub-agent check and registration must be atomic + # to prevent TOCTOU race when multiple spawns fire concurrently. + # Coder.create() is called *outside* the lock to avoid holding an + # await across a lock (which risks deadlock if Coder.create() ever + # tried to acquire the same lock). + parent_coder = parent if parent is not None else self.coder - from cecli.coders import Coder + async with self._get_lock(self._spawn_locks, parent_coder.uuid): + self._check_max_sub_agents() + new_uuid = str(uuid4()) - parent_coder = parent if parent is not None else self.coder - new_uuid = str(uuid4()) + from cecli.coders import Coder kwargs = dict( io=parent_coder.io, @@ -362,18 +380,23 @@ async def _create_sub_agent_coder( new_coder = await Coder.create(**kwargs) # IOProxy wrapping is handled by base_coder.py's Coder.__init__ - # Register in global coder lookup - self._uuid_coder_map[new_uuid] = weakref.ref(new_coder) + # Re-acquire the lock to register — we must re-check max agents since + # the lock was released and other spawns may have registered in between. + async with self._get_lock(self._spawn_locks, parent_coder.uuid): + self._check_max_sub_agents() - info = SubAgentInfo( - name=name, - coder=new_coder, - parent_uuid=parent_coder.uuid, - status=SubAgentStatus.CREATED, - ) + # Register in global coder lookup + self._uuid_coder_map[new_uuid] = weakref.ref(new_coder) + + info = SubAgentInfo( + name=name, + coder=new_coder, + parent_uuid=parent_coder.uuid, + status=SubAgentStatus.CREATED, + ) - self.sub_agents[new_coder.uuid] = info - self._sub_agent_order.append(new_coder.uuid) + self.sub_agents[new_coder.uuid] = info + self._sub_agent_order.append(new_coder.uuid) # Notify TUI to create a container try: @@ -432,6 +455,21 @@ def start_generate_task(self, info: SubAgentInfo, user_message: str) -> asyncio. for cancellation/monitoring. Args: + + .. note:: + + **Ordering dependency with mark_sub_agent_finished()** + + ``mark_sub_agent_finished()`` (called *synchronously* inside the tool + execution pipeline of ``generate()``) writes ``info.status`` and + ``info.summary`` before ``generate()`` returns to this task. + + The ``if info.status == SubAgentStatus.RUNNING:`` guard below correctly + prevents the task from overwriting those values with defaults. + + This ordering is currently safe because tool execution is synchronous. + If tool execution is refactored to introduce interleaved ``await`` points, + this dependency would break and an ``asyncio.Event`` would be needed. info: The SubAgentInfo for the sub-agent. user_message: The user message to pass to ``generate()``. @@ -446,10 +484,12 @@ async def _run_generate(): if info.status == SubAgentStatus.RUNNING: info.status = SubAgentStatus.FINISHED info.summary = info.summary or DEFAULT_SUMMARY_COMPLETED + await self._inject_sub_agent_result(info) except asyncio.CancelledError: info.status = SubAgentStatus.FINISHED info.summary = info.summary or DEFAULT_SUMMARY_INTERRUPTED logger.debug("Sub-agent %s generate cancelled (interrupted)", info.name) + await self._inject_sub_agent_result(info) raise except Exception as exc: info.status = SubAgentStatus.ERROR @@ -460,6 +500,7 @@ async def _run_generate(): exc, exc_info=True, ) + await self._inject_sub_agent_result(info) raise # Cancel any previous generate task to prevent duplicate concurrent generates @@ -468,8 +509,79 @@ async def _run_generate(): task = asyncio.create_task(_run_generate()) info.generate_task = task + # Suppress "Task exception was never retrieved" for fire-and-forget tasks + task.add_done_callback(lambda t: t.exception() if not t.cancelled() else None) return task + async def _inject_sub_agent_result(self, info: SubAgentInfo) -> None: + """Inject the sub-agent's result (summary/error) into the parent's conversation. + + Adds a user message with the result content and an assistant acknowledgment + so the parent coder (and therefore the LLM) sees what the sub-agent produced. + Uses unique hash keys so multiple sub-agent completions don't overwrite each other. + """ + from cecli.helpers.conversation.service import ConversationService + from cecli.helpers.conversation.tags import MessageTag + + # Capture coder UUID early in case the sub-agent is cleaned up before + # this method completes (the weakref could become invalid). + coder_uuid = getattr(info.coder, "uuid", "(unknown)") + + parent_coder_ref = self._uuid_coder_map.get(info.parent_uuid) + if not parent_coder_ref: + return + + parent_coder = parent_coder_ref() + if not parent_coder: + return + + if info.status == SubAgentStatus.ERROR: + user_content = ( + f"The **{info.name}** agent (`{coder_uuid}`) encountered an error:\n" + f"{info.error}" + ) + assistant_content = ( + f"The {info.name} agent `{coder_uuid}` failed with the error above. " + f"You may want to review or retry the delegation." + ) + elif info.status == SubAgentStatus.FINISHED: + is_interrupted = info.summary == DEFAULT_SUMMARY_INTERRUPTED + summary_text = info.summary or DEFAULT_SUMMARY_COMPLETED + if is_interrupted: + user_content = ( + f"The **{info.name}** agent (`{coder_uuid}`) was interrupted:\n" + f"{summary_text}" + ) + assistant_content = ( + f"The {info.name} agent `{coder_uuid}` was interrupted before completing its task. " + f"You may want to review or retry the delegation." + ) + else: + user_content = ( + f"The **{info.name}** agent (`{coder_uuid}`) completed with the following summary:\n" + f"{summary_text}" + ) + assistant_content = ( + f"Thank you for sharing the summary for {info.name} agent `{coder_uuid}`. " + f"The agent has finished its task." + ) + else: + return + + async with self._get_lock(self._conversation_locks, info.parent_uuid): + ConversationService.get_manager(parent_coder).add_message( + message_dict={"role": "user", "content": user_content}, + tag=MessageTag.CUR, + hash_key=("sub_agent_result", "user", coder_uuid, str(time.monotonic_ns())), + force=True, + ) + ConversationService.get_manager(parent_coder).add_message( + message_dict={"role": "assistant", "content": assistant_content}, + tag=MessageTag.CUR, + hash_key=("sub_agent_result", "assistant", coder_uuid, str(time.monotonic_ns())), + force=True, + ) + async def invoke( self, name: str, prompt: str, blocking: bool = True, parent: Any = None ) -> Optional[str]: @@ -610,4 +722,7 @@ def cleanup_all_for_parent(self) -> None: """Clean up all sub-agents when the parent session ends.""" for uuid in list(self.sub_agents.keys()): self._cleanup_sub_agent(uuid) + # Clean up lock pools to prevent memory leaks + self._spawn_locks.pop(self.coder.uuid, None) + self._conversation_locks.pop(self.coder.uuid, None) self._instances.pop(self.coder.uuid, None) diff --git a/cecli/helpers/conversation/integration.py b/cecli/helpers/conversation/integration.py index 3c5796c1139..f5c046728e2 100644 --- a/cecli/helpers/conversation/integration.py +++ b/cecli/helpers/conversation/integration.py @@ -1013,6 +1013,36 @@ def add_post_message_context_blocks(self) -> None: force=True, ) + def add_sub_agent_states(self) -> None: + """ + Add sub-agent states context block to conversation (priority 250). + + Sub-agent states include: name, UUID, and status (CREATED, RUNNING, + FINISHED, ERROR) of each active child sub-agent. + """ + coder = self.get_coder() + if not coder: + return + + if not hasattr(coder, "use_enhanced_context") or not coder.use_enhanced_context: + return + + if not hasattr(coder, "get_child_agent_states"): + return + + block = coder.get_child_agent_states() + if not block: + return + + ConversationService.get_manager(coder).add_message( + message_dict={"role": "user", "content": block}, + tag=MessageTag.STATIC, + priority=DEFAULT_TAG_PRIORITY[MessageTag.REMINDER] + 25, # After post_message blocks + mark_for_delete=0, + hash_key=("sub_agent_states",), + force=True, + ) + def defer_removal(self, file_path: str): self._deferred_removals.add(file_path) diff --git a/cecli/prompts/agent.yml b/cecli/prompts/agent.yml index 730e5975bac..988538838a5 100644 --- a/cecli/prompts/agent.yml +++ b/cecli/prompts/agent.yml @@ -46,7 +46,7 @@ main_system: | 2. **Explore**: Use discovery tools (`ExploreCode`, `Grep`, `Ls`) to research and gather understanding for you task. Modify search terms when errors are encountered. 3. **Execute**: Mark files as editable with `ContextManager` before attempting edits. Proactively use skills if they are available. Review diff outputs after edit to ensure the proper changes were made. 4. **Verify & Recover**: If an edit fails or introduces linting errors, use `UndoChange` immediately. - 5. **Finished**: Use the `Finished` tool only after verifying the solution. Briefly summarize the changes for the user. + 5. **Yield**: Use the `Yield` tool only after verifying the solution. Briefly summarize the changes for the user. ## Todo List Management - Break complex goals into meaningful sub-tasks so the problem remains tractable diff --git a/cecli/prompts/subagent.yml b/cecli/prompts/subagent.yml index a260dc9a5f3..1339499995c 100644 --- a/cecli/prompts/subagent.yml +++ b/cecli/prompts/subagent.yml @@ -31,7 +31,7 @@ main_system: | 2. **Explore**: Use discovery tools (`ExploreCode`, `Grep`, `Ls`) to research and gather understanding for you task. Modify search terms when errors are encountered. 3. **Execute**: Mark files as editable with `ContextManager` before attempting edits. Proactively use skills if they are available. Review diff outputs after edit to ensure the proper changes were made. 4. **Verify & Recover**: If an edit fails or introduces linting errors, use `UndoChange` immediately. - 5. **Finished**: Use the `Finished` tool only after verifying the solution. Briefly summarize the changes for the user. + 5. **Yield**: Use the `Yield` tool only after verifying the solution. Briefly summarize the changes for the user. ## Todo List Management - Break complex goals into meaningful sub-tasks so the problem remains tractable @@ -54,8 +54,9 @@ system_reminder: | - **Responses**: Reason out loud through the problem but be brief. **Finishing Up**: - Be very detailed in your `Finished` tool summary in describing your task, findings, efforts and results. - Include all of your final response inside the "summary" text so maximum information is available to the user. + Be very detailed in your `Yield` tool summary in describing your task, findings, efforts and results. + Include all of your final response inside the "summary" text. + Please be verbose so as much detail is made available to the user as possible. {lazy_prompt} {shell_cmd_reminder} diff --git a/cecli/tools/__init__.py b/cecli/tools/__init__.py index 07b1754aa31..44e527cff37 100644 --- a/cecli/tools/__init__.py +++ b/cecli/tools/__init__.py @@ -3,13 +3,13 @@ # Import all tool modules from . import ( + _yield, command, command_interactive, context_manager, delegate, edit_text, explore_code, - finished, git_branch, git_diff, git_log, @@ -34,7 +34,7 @@ delegate, edit_text, explore_code, - finished, + _yield, git_branch, git_diff, git_log, diff --git a/cecli/tools/_yield.py b/cecli/tools/_yield.py new file mode 100644 index 00000000000..c1372c1bdca --- /dev/null +++ b/cecli/tools/_yield.py @@ -0,0 +1,153 @@ +import asyncio +import json +import logging + +from cecli.tools.utils.base_tool import BaseTool +from cecli.tools.utils.output import color_markers, tool_footer, tool_header + +logger = logging.getLogger(__name__) + + +class Tool(BaseTool): + NORM_NAME = "yield" + TRACK_INVOCATIONS = False + SCHEMA = { + "type": "function", + "function": { + "name": "Yield", + "description": "Yield control back to the user, indicating all sub-goals are complete.", + "parameters": { + "type": "object", + "properties": { + "summary": { + "type": "string", + "description": ( + "Optional summary of what was accomplished. " + "When called by a sub-agent, this summary is captured " + "and returned to the parent agent." + ), + }, + }, + "required": [], + }, + }, + } + + @classmethod + async def execute(cls, coder, **kwargs): + """ + Mark that the current generation task needs no further effort. + + This gives the LLM explicit control over when it can stop looping + """ + cls.clear_invocation_cache() + + if coder: + # Check for active child sub-agents and await their tasks before finishing + try: + from cecli.helpers.agents.service import AgentService + + agent_service = AgentService.get_instance(coder) + children = agent_service.get_children(coder) + active_tasks = [ + info.generate_task + for info in children + if info.generate_task is not None and not info.generate_task.done() + ] + + if active_tasks: + coder.io.tool_warning( + f"Waiting for {len(active_tasks)} sub-agent(s) to complete before yielding..." + ) + + # Single asyncio.wait that includes both the sub-agent tasks and + # the interrupt event, avoiding nested asyncio.wait() calls. + interrupt_event = coder.interrupt_event + if interrupt_event is None: + interrupt_event = asyncio.Event() + + interrupt_task = asyncio.create_task(interrupt_event.wait()) + pending = set(active_tasks) | {interrupt_task} + + while any(t in pending for t in active_tasks): + done, still_pending = await asyncio.wait( + pending, timeout=5.0, return_when=asyncio.FIRST_COMPLETED + ) + pending = still_pending + + if interrupt_task in done: + # Interrupted — cancel remaining sub-agent tasks + for t in pending: + t.cancel() + try: + await t + except (asyncio.CancelledError, Exception): + pass + return ( + "Yield interrupted while waiting for sub-agents. " + "Sub-agent outputs above may be incomplete." + ) + + # Retrieve exceptions from completed sub-agent tasks so they + # are not silently lost. + for t in done: + if t is not interrupt_task: + exc = t.exception() + if exc: + logger.warning("Sub-agent task raised an exception: %s", exc) + + # Cancel the interrupt task since we are done waiting + if not interrupt_task.done(): + interrupt_task.cancel() + try: + await interrupt_task + except asyncio.CancelledError: + pass + + # Fall through to the normal finishing flow below + except Exception as e: + logger.warning("Error awaiting child sub-agents before yield: %s", e) + + coder.agent_finished = True + + # If this is a sub-agent, capture the summary for the parent + summary = kwargs.get("summary", None) + parent_uuid = coder.parent_uuid + if parent_uuid: + try: + from cecli.helpers.agents.service import AgentService + + AgentService.mark_sub_agent_finished( + sub_coder_uuid=coder.uuid, + parent_uuid=parent_uuid, + summary=summary, + ) + except Exception: + pass + + if coder.files_edited_by_tools: + _ = await coder.auto_commit(coder.files_edited_by_tools) + coder.files_edited_by_tools = set() + + if summary: + return f"Yielded. Summary: {summary}" + return "Yielded." + + # coder.io.tool_Error("Error: Could not mark agent task as finished") + return "Error: Could not yield control" + + @classmethod + def format_output(cls, coder, mcp_server, tool_response): + color_start, color_end = color_markers(coder) + params = json.loads(tool_response.function.arguments) + + tool_header(coder=coder, mcp_server=mcp_server, tool_response=tool_response) + + summary = params.get("summary") + if summary: + coder.io.tool_output("") + coder.io.tool_output(f"{color_start}Summary:{color_end}") + coder.io.tool_output(summary) + coder.io.tool_output("") + + tool_footer(coder=coder, tool_response=tool_response) diff --git a/cecli/tools/delegate.py b/cecli/tools/delegate.py index e01cebc2e17..660db6c276d 100644 --- a/cecli/tools/delegate.py +++ b/cecli/tools/delegate.py @@ -65,37 +65,34 @@ async def execute(cls, coder, **kwargs): from cecli.helpers.agents.service import AgentService agent_service = AgentService.get_instance(coder) - # Track results with status flag instead of fragile emoji checks - results: list[tuple[bool, str]] = [] - async def _run_one(name: str, prompt: str) -> tuple[bool, str]: - """Run a single sub-agent and return a (success, formatted_message) tuple.""" + async def _spawn_one(name: str, prompt: str) -> tuple[str, str]: + """Spawn a single sub-agent and return (name, uuid_or_error).""" try: - agent_service._check_max_sub_agents() - summary = await agent_service.invoke(name, prompt, parent=coder, blocking=True) - if summary: - return True, f"Sub-agent '{name}' completed:\n{summary}" - return True, f"Sub-agent '{name}' completed (no summary)." - except (ValueError, RuntimeError) as e: - return False, f"Sub-agent '{name}' failed: {e}" + new_coder, info = await agent_service.spawn(name, prompt, parent=coder) + return name, info.coder.uuid except Exception as e: - return False, f"Sub-agent '{name}' failed with unexpected error: {e}" + return name, f"failed: {e}" - # Dispatch all delegations in parallel - tasks = [_run_one(d["name"], d["prompt"]) for d in delegations] + # Dispatch all delegations in parallel (spawn is fire-and-forget, but + # _create_sub_agent_coder is async so we gather for concurrency) + tasks = [_spawn_one(d["name"], d["prompt"]) for d in delegations] raw_results = await asyncio.gather(*tasks) - # Separate success flag from message - for success, msg in raw_results: - results.append((success, msg)) + started_agents: list[tuple[str, str]] = list(raw_results) # Build a consolidated report - n_ok = sum(1 for ok, _ in results if ok) - n_total = len(results) - separator = "\n" + "─" * 60 + "\n" - combined = separator.join(msg for _, msg in results) - - return f"📋 Delegation results ({n_ok}/{n_total} succeeded):" f"{separator}{combined}" + lines = [] + for name, result in started_agents: + if result.startswith("failed:"): + lines.append(f"❌ **{name}**: {result}") + else: + lines.append(f"✅ **{name}** agent started with id `{result}`") + + n_total = len(started_agents) + n_ok = sum(1 for _, r in started_agents if not r.startswith("failed:")) + combined = "\n".join(lines) + return f"📋 Delegation results ({n_ok}/{n_total} dispatched):\n{combined}" @classmethod def format_output(cls, coder, mcp_server, tool_response): diff --git a/cecli/tools/finished.py b/cecli/tools/finished.py deleted file mode 100644 index b099d1eca90..00000000000 --- a/cecli/tools/finished.py +++ /dev/null @@ -1,86 +0,0 @@ -import json - -from cecli.tools.utils.base_tool import BaseTool -from cecli.tools.utils.output import color_markers, tool_footer, tool_header - - -class Tool(BaseTool): - NORM_NAME = "finished" - TRACK_INVOCATIONS = False - SCHEMA = { - "type": "function", - "function": { - "name": "Finished", - "description": ( - "Declare that we are done with every single sub goal and no further work is needed." - ), - "parameters": { - "type": "object", - "properties": { - "summary": { - "type": "string", - "description": ( - "Optional summary of what was accomplished. " - "When called by a sub-agent, this summary is captured " - "and returned to the parent agent." - ), - }, - }, - "required": [], - }, - }, - } - - @classmethod - async def execute(cls, coder, **kwargs): - """ - Mark that the current generation task needs no further effort. - - This gives the LLM explicit control over when it can stop looping - """ - cls.clear_invocation_cache() - - if coder: - coder.agent_finished = True - - # If this is a sub-agent, capture the summary for the parent - summary = kwargs.get("summary", None) - parent_uuid = coder.parent_uuid - if parent_uuid: - try: - from cecli.helpers.agents.service import AgentService - - AgentService.mark_sub_agent_finished( - sub_coder_uuid=coder.uuid, - parent_uuid=parent_uuid, - summary=summary, - ) - except Exception: - pass - - if coder.files_edited_by_tools: - _ = await coder.auto_commit(coder.files_edited_by_tools) - coder.files_edited_by_tools = set() - - if summary: - return f"Task Finished! Summary: {summary}" - return "Task Finished!" - - # coder.io.tool_Error("Error: Could not mark agent task as finished") - return "Error: Could not mark agent task as finished" - - @classmethod - def format_output(cls, coder, mcp_server, tool_response): - color_start, color_end = color_markers(coder) - params = json.loads(tool_response.function.arguments) - - tool_header(coder=coder, mcp_server=mcp_server, tool_response=tool_response) - - summary = params.get("summary") - if summary: - coder.io.tool_output("") - coder.io.tool_output(f"{color_start}Summary:{color_end}") - coder.io.tool_output(summary) - coder.io.tool_output("") - - tool_footer(coder=coder, tool_response=tool_response) diff --git a/cecli/tools/utils/registry.py b/cecli/tools/utils/registry.py index 45f333301ff..fe0bffc8081 100644 --- a/cecli/tools/utils/registry.py +++ b/cecli/tools/utils/registry.py @@ -19,7 +19,7 @@ class ToolRegistry: """Registry for tool discovery and management.""" _tools: Dict[str, Type] = {} # normalized name -> Tool class - _essential_tools: Set[str] = {"contextmanager", "edittext", "finished"} + _essential_tools: Set[str] = {"contextmanager", "edittext", "yield"} _registry: Dict[str, Type] = {} # cached filtered registry loaded_custom_tools: List[str] = [] diff --git a/cecli/website/docs/config/agent-mode.md b/cecli/website/docs/config/agent-mode.md index 45c13d473c8..f6c29774c55 100644 --- a/cecli/website/docs/config/agent-mode.md +++ b/cecli/website/docs/config/agent-mode.md @@ -38,7 +38,7 @@ Agent Mode operates through a continuous loop where the LLM: 3. **Executes editing tools** to make changes 4. **Processes results** and continues exploration and editing until the task is complete -This loop continues automatically until the `Finished` tool is called, or the maximum number of iterations is reached. +This loop continues automatically until the `Yield` tool is called, or the maximum number of iterations is reached. ### Key Components @@ -50,7 +50,7 @@ Agent Mode uses a centralized local tool registry that manages all available too - **Editing Tools**: `EditText`, - **Context Management Tools**: `ContextManager`, `GetLines` - **Git Tools**: `GitDiff`, `GitLog`, `GitShow`, `GitStatus` -- **Utility Tools**: `UpdateTodoList`, `UndoChange`, `Finished` +- **Utility Tools**: `UpdateTodoList`, `UndoChange`, `Yield` - **Skill Management**: `LoadSkill`, `RemoveSkill` - **Sub-Agent Tools**: `Delegate` - Delegate sub-tasks to specialized sub-agents @@ -138,7 +138,7 @@ Arguments: {} The above continues over and over until: ``` -Tool Call: Finished +Tool Call: Yield Arguments: {} ``` diff --git a/cecli/website/docs/config/subagents.md b/cecli/website/docs/config/subagents.md index 1d20b3ae6a8..ac223f24830 100644 --- a/cecli/website/docs/config/subagents.md +++ b/cecli/website/docs/config/subagents.md @@ -147,7 +147,7 @@ When the limit is reached: ### Cleanup -- **Normal completion**: A sub-agent calls `Finished(summary="...")` which marks it as finished. Its container remains visible but its resources are eligible for lazy cleanup. +- **Normal completion**: A sub-agent calls `Yield(summary="...")` which marks it as finished. Its container remains visible but its resources are eligible for lazy cleanup. - **Session end**: When the parent session ends, all sub-agents are automatically cleaned up. - **Force cleanup**: Use `/reap-agent` to immediately destroy a sub-agent and reclaim all resources. diff --git a/tests/subagents/test_commands.py b/tests/subagents/test_commands.py index c8d55914e57..c5210736a0b 100644 --- a/tests/subagents/test_commands.py +++ b/tests/subagents/test_commands.py @@ -7,113 +7,6 @@ import pytest -class TestInvokeAgentCommand: - """Tests for InvokeAgentCommand.""" - - @pytest.mark.asyncio - async def test_no_args_shows_usage(self): - """Empty args shows usage error.""" - from cecli.commands.invoke_agent import InvokeAgentCommand - - io = MagicMock() - await InvokeAgentCommand.execute(io, None, "") - - io.tool_error.assert_called_once() - assert "Usage" in io.tool_error.call_args[0][0] - - @pytest.mark.asyncio - async def test_name_only_no_prompt(self): - """Name without prompt passes empty string.""" - from cecli.commands.invoke_agent import InvokeAgentCommand - - io = MagicMock() - coder = MagicMock() - - with patch("cecli.helpers.agents.service.AgentService") as MockSvc: - mock_instance = MagicMock() - mock_instance.invoke = AsyncMock(return_value="ok") - MockSvc.get_instance.return_value = mock_instance - - await InvokeAgentCommand.execute(io, coder, "reviewer") - - mock_instance.invoke.assert_called_once_with("reviewer", "", blocking=True) - - @pytest.mark.asyncio - async def test_name_with_prompt(self): - """Name with prompt passes prompt correctly.""" - from cecli.commands.invoke_agent import InvokeAgentCommand - - io = MagicMock() - coder = MagicMock() - - with patch("cecli.helpers.agents.service.AgentService") as MockSvc: - mock_instance = MagicMock() - mock_instance.invoke = AsyncMock(return_value="done") - MockSvc.get_instance.return_value = mock_instance - - await InvokeAgentCommand.execute(io, coder, "reviewer review this") - - mock_instance.invoke.assert_called_once_with("reviewer", "review this", blocking=True) - - @pytest.mark.asyncio - async def test_value_error_shown_as_error(self): - """ValueError from service shown via io.tool_error.""" - from cecli.commands.invoke_agent import InvokeAgentCommand - - io = MagicMock() - coder = MagicMock() - - with patch("cecli.helpers.agents.service.AgentService") as MockSvc: - mock_instance = MagicMock() - mock_instance.invoke = AsyncMock(side_effect=ValueError("unknown")) - MockSvc.get_instance.return_value = mock_instance - - await InvokeAgentCommand.execute(io, coder, "ghost go") - - io.tool_error.assert_called() - assert "unknown" in io.tool_error.call_args[0][0] - - @pytest.mark.asyncio - async def test_runtime_error_shown_as_error(self): - """RuntimeError from service shown via io.tool_error.""" - from cecli.commands.invoke_agent import InvokeAgentCommand - - io = MagicMock() - coder = MagicMock() - - with patch("cecli.helpers.agents.service.AgentService") as MockSvc: - mock_instance = MagicMock() - mock_instance.invoke = AsyncMock(side_effect=RuntimeError("max reached")) - MockSvc.get_instance.return_value = mock_instance - - await InvokeAgentCommand.execute(io, coder, "reviewer go") - - io.tool_error.assert_called() - assert "max reached" in io.tool_error.call_args[0][0] - - @pytest.mark.asyncio - async def test_summary_output_on_completion(self): - """Successful completion shows summary via io.tool_output.""" - from cecli.commands.invoke_agent import InvokeAgentCommand - - io = MagicMock() - coder = MagicMock() - - with patch("cecli.helpers.agents.service.AgentService") as MockSvc: - mock_instance = MagicMock() - mock_instance.invoke = AsyncMock(return_value="task done") - MockSvc.get_instance.return_value = mock_instance - - with patch("cecli.helpers.conversation.service.ConversationService") as MockCS: - mock_manager = MagicMock() - MockCS.get_manager.return_value = mock_manager - - await InvokeAgentCommand.execute(io, coder, "reviewer do it") - - io.tool_output.assert_called_once() - assert "task done" in io.tool_output.call_args[0][0] - - class TestSpawnAgentCommand: """Tests for SpawnAgentCommand.""" @@ -143,7 +36,7 @@ async def test_valid_name_calls_spawn(self): await SpawnAgentCommand.execute(io, coder, "reviewer") - mock_instance.spawn.assert_called_once_with("reviewer") + mock_instance.spawn.assert_called_once_with("reviewer", None, parent=coder) io.tool_output.assert_called_once() assert "spawned" in io.tool_output.call_args[0][0] diff --git a/tests/subagents/test_delegate.py b/tests/subagents/test_delegate.py index 2ec5cc23d4c..97cb4ec9cf2 100644 --- a/tests/subagents/test_delegate.py +++ b/tests/subagents/test_delegate.py @@ -38,8 +38,8 @@ async def test_both_empty_returns_name_error(self): assert "name" in result @pytest.mark.asyncio - async def test_valid_delegate_calls_invoke(self): - """Valid params call AgentService.invoke with correct args.""" + async def test_valid_delegate_calls_spawn(self): + """Valid params call AgentService.spawn with correct args.""" from cecli.tools.delegate import Tool mock_coder = MagicMock() @@ -47,7 +47,10 @@ async def test_valid_delegate_calls_invoke(self): with patch("cecli.helpers.agents.service.AgentService") as MockService: mock_instance = MagicMock() - mock_instance.invoke = AsyncMock(return_value="review summary") + # spawn returns (new_coder, info); info.coder.uuid is used in output + mock_info = MagicMock() + mock_info.coder.uuid = "child-uuid-123" + mock_instance.spawn = AsyncMock(return_value=(MagicMock(), mock_info)) MockService.get_instance.return_value = mock_instance result = await Tool.execute( @@ -55,49 +58,65 @@ async def test_valid_delegate_calls_invoke(self): ) MockService.get_instance.assert_called_once_with(mock_coder) - mock_instance.invoke.assert_called_once_with("reviewer", "review this", blocking=True) - assert "review summary" in result + mock_instance.spawn.assert_called_once_with( + "reviewer", "review this", parent=mock_coder + ) + assert "agent started with id" in result + assert "child-uuid-123" in result - @pytest.mark.asyncio - async def test_delegate_no_summary(self): - """When invoke returns None, returns appropriate message.""" + async def test_delegate_multiple_delegations(self): + """Multiple delegations show correct dispatch count.""" from cecli.tools.delegate import Tool mock_coder = MagicMock() + mock_coder.uuid = "parent-uuid" + with patch("cecli.helpers.agents.service.AgentService") as MockService: mock_instance = MagicMock() - mock_instance.invoke = AsyncMock(return_value=None) + + async def spawn_side_effect(name, prompt, parent=None): + mock_info = MagicMock() + mock_info.coder.uuid = f"{name}-uuid" + return MagicMock(), mock_info + + mock_instance.spawn = AsyncMock(side_effect=spawn_side_effect) MockService.get_instance.return_value = mock_instance result = await Tool.execute( - mock_coder, delegations=[{"name": "tester", "prompt": "test"}] + mock_coder, + delegations=[ + {"name": "agent1", "prompt": "task1"}, + {"name": "agent2", "prompt": "task2"}, + ], ) - assert "completed (no summary)" in result + + assert "2/2 dispatched" in result + assert "agent1" in result + assert "agent2" in result @pytest.mark.asyncio - async def test_delegate_value_error_returns_error_string(self): - """ValueError from service returns error string.""" + async def test_delegate_spawn_error_returns_error_string(self): + """Error from spawn returns error string.""" from cecli.tools.delegate import Tool mock_coder = MagicMock() with patch("cecli.helpers.agents.service.AgentService") as MockService: mock_instance = MagicMock() - mock_instance.invoke = AsyncMock(side_effect=ValueError("unknown agent")) + mock_instance.spawn = AsyncMock(side_effect=ValueError("unknown agent")) MockService.get_instance.return_value = mock_instance result = await Tool.execute(mock_coder, delegations=[{"name": "ghost", "prompt": "x"}]) assert "failed" in result assert "unknown agent" in result - @pytest.mark.asyncio async def test_delegate_runtime_error_returns_error_string(self): - """RuntimeError from service returns error string.""" + """RuntimeError from spawn returns error string.""" from cecli.tools.delegate import Tool mock_coder = MagicMock() with patch("cecli.helpers.agents.service.AgentService") as MockService: mock_instance = MagicMock() - mock_instance.invoke = AsyncMock(side_effect=RuntimeError("max reached")) + mock_instance.spawn = AsyncMock(side_effect=RuntimeError("max reached")) MockService.get_instance.return_value = mock_instance result = await Tool.execute( @@ -106,7 +125,6 @@ async def test_delegate_runtime_error_returns_error_string(self): assert "failed" in result assert "max reached" in result - @pytest.mark.asyncio async def test_unexpected_exception_caught(self): """Any other exception returns error string (doesn't propagate).""" from cecli.tools.delegate import Tool @@ -114,11 +132,11 @@ async def test_unexpected_exception_caught(self): mock_coder = MagicMock() with patch("cecli.helpers.agents.service.AgentService") as MockService: mock_instance = MagicMock() - mock_instance.invoke = AsyncMock(side_effect=Exception("unexpected")) + mock_instance.spawn = AsyncMock(side_effect=Exception("unexpected")) MockService.get_instance.return_value = mock_instance result = await Tool.execute( mock_coder, delegations=[{"name": "reviewer", "prompt": "x"}] ) - assert "failed with unexpected error" in result + assert "failed" in result assert "unexpected" in result diff --git a/tests/subagents/test_finished.py b/tests/subagents/test_finished.py index ce1137f0a8f..2941a374745 100644 --- a/tests/subagents/test_finished.py +++ b/tests/subagents/test_finished.py @@ -8,12 +8,12 @@ class TestFinishedTool: - """Tests for the Finished tool sub-agent behavior.""" + """Tests for the Yield tool sub-agent behavior.""" @pytest.mark.asyncio async def test_sets_agent_finished_on_coder(self): """Sets coder.agent_finished = True.""" - from cecli.tools.finished import Tool + from cecli.tools._yield import Tool mock_coder = MagicMock() mock_coder.parent_uuid = "" @@ -27,7 +27,7 @@ async def test_sets_agent_finished_on_coder(self): async def test_sub_agent_with_summary_updates_info(self): """Sub-agent with summary updates SubAgentInfo.summary and status.""" from cecli.helpers.agents.service import AgentService, SubAgentStatus - from cecli.tools.finished import Tool + from cecli.tools._yield import Tool mock_coder = MagicMock() mock_coder.uuid = "sub-uuid" @@ -51,7 +51,7 @@ async def test_sub_agent_with_summary_updates_info(self): @pytest.mark.asyncio async def test_sub_agent_without_summary(self): """Sub-agent without summary kwarg doesn't crash.""" - from cecli.tools.finished import Tool + from cecli.tools._yield import Tool mock_coder = MagicMock() mock_coder.uuid = "sub-uuid" @@ -59,25 +59,25 @@ async def test_sub_agent_without_summary(self): mock_coder.files_edited_by_tools = set() result = await Tool.execute(mock_coder) - assert result == "Task Finished!" + assert result == "Yielded." @pytest.mark.asyncio async def test_non_sub_agent_skips_lookup(self): """Coder without parent_uuid skips sub-agent lookup.""" - from cecli.tools.finished import Tool + from cecli.tools._yield import Tool mock_coder = MagicMock() mock_coder.parent_uuid = "" mock_coder.files_edited_by_tools = set() result = await Tool.execute(mock_coder) - assert result == "Task Finished!" + assert result == "Yielded." @pytest.mark.asyncio async def test_unknown_parent_uuid_caught_gracefully(self): """Sub-agent with parent not in _instances is caught silently.""" from cecli.helpers.agents.service import AgentService - from cecli.tools.finished import Tool + from cecli.tools._yield import Tool mock_coder = MagicMock() mock_coder.uuid = "sub-uuid" @@ -88,10 +88,9 @@ async def test_unknown_parent_uuid_caught_gracefully(self): result = await Tool.execute(mock_coder, summary="done") assert "Summary: done" in result - @pytest.mark.asyncio async def test_returns_summary_in_response(self): """When summary provided, response includes it.""" - from cecli.tools.finished import Tool + from cecli.tools._yield import Tool mock_coder = MagicMock() mock_coder.parent_uuid = "" @@ -103,7 +102,7 @@ async def test_returns_summary_in_response(self): @pytest.mark.asyncio async def test_coder_is_none_returns_error(self): """When coder is None, returns error string.""" - from cecli.tools.finished import Tool + from cecli.tools._yield import Tool result = await Tool.execute(None) assert "Error" in result diff --git a/tests/subagents/test_service.py b/tests/subagents/test_service.py index 4d3ba4c555c..e866603066d 100644 --- a/tests/subagents/test_service.py +++ b/tests/subagents/test_service.py @@ -471,71 +471,82 @@ class TestWait: """AgentService.wait() behavior.""" @pytest.mark.asyncio - async def test_unknown_name_raises(self, service): - """Unknown name raises ValueError.""" - with pytest.raises(ValueError, match="No sub-agent named"): - await service.wait("ghost") + async def test_no_children_returns_empty_list(self, service): + """Parent with no children returns empty list.""" + parent_coder = MagicMock() + parent_coder.uuid = "parent-uuid" + result = await service.wait(parent_coder) + assert result == [] @pytest.mark.asyncio - async def test_wait_finished_returns_summary(self, service): - """Already FINISHED returns summary immediately.""" + async def test_wait_finished_returns_summary_list(self, service): + """Already FINISHED returns summary in a list.""" + parent_coder = MagicMock() + parent_coder.uuid = "parent-uuid" info = SubAgentInfo( name="agent", coder=MagicMock(), - parent_uuid="parent", + parent_uuid="parent-uuid", status=SubAgentStatus.FINISHED, summary="done", ) + info.generate_task = None service.sub_agents["agent"] = info service._sub_agent_order.append("agent") - result = await service.wait("agent") - assert result == "done" + result = await service.wait(parent_coder) + assert result == ["done"] @pytest.mark.asyncio - async def test_wait_error_raises(self, service): - """ERROR status raises RuntimeError.""" + async def test_wait_error_returns_none_summary(self, service): + """ERROR status returns list containing None summary.""" + parent_coder = MagicMock() + parent_coder.uuid = "parent-uuid" info = SubAgentInfo( name="agent", coder=MagicMock(), - parent_uuid="parent", + parent_uuid="parent-uuid", status=SubAgentStatus.ERROR, error="something broke", + summary=None, ) + info.generate_task = None service.sub_agents["agent"] = info service._sub_agent_order.append("agent") - with pytest.raises(RuntimeError, match="something broke"): - await service.wait("agent") + result = await service.wait(parent_coder) + assert result == [None] @pytest.mark.asyncio async def test_wait_polls_until_finished(self, service): - """Polls until status is FINISHED then returns summary.""" + """Polls via generate_task until FINISHED then returns summary.""" + import asyncio + + parent_coder = MagicMock() + parent_coder.uuid = "parent-uuid" + info = SubAgentInfo( name="agent", coder=MagicMock(), - parent_uuid="parent", + parent_uuid="parent-uuid", status=SubAgentStatus.CREATED, ) - service.sub_agents["agent"] = info - service._sub_agent_order.append("agent") - # Simulate the sub-agent finishing after a brief delay async def finish_later(): - import asyncio - - await asyncio.sleep(0.1) + await asyncio.sleep(0.05) info.status = SubAgentStatus.FINISHED info.summary = "completed" - import asyncio + # Create a generate_task that completes when finish_later runs + async def gen_task(): + await finish_later() - await asyncio.gather( - service.wait("agent"), - finish_later(), - ) + info.generate_task = asyncio.create_task(gen_task()) + service.sub_agents["agent"] = info + service._sub_agent_order.append("agent") - assert info.summary == "completed" + result = await service.wait(parent_coder) + assert result == ["completed"] # ================================================================== # diff --git a/tests/tools/test_registry.py b/tests/tools/test_registry.py index a0a9b34be34..7f540e38bde 100644 --- a/tests/tools/test_registry.py +++ b/tests/tools/test_registry.py @@ -28,7 +28,7 @@ def test_registry_initialization(self): assert len(tools) > 0, "Registry should have tools after initialization" # Check that essential tools are registered - essential_tools = {"contextmanager", "edittext", "finished"} + essential_tools = {"contextmanager", "edittext", "yield"} for tool in essential_tools: assert tool in tools, f"Essential tool {tool} should be registered" @@ -54,7 +54,7 @@ def test_build_registry_empty_config(self): # Essential tools should always be included assert "contextmanager" in registry, "Essential tool should be included" assert "edittext" in registry, "Essential tool should be included" - assert "finished" in registry, "Essential tool should be included" + assert "yield" in registry, "Essential tool should be included" def test_build_registry_with_includelist(self): """Test filtering with tools_includelist""" @@ -65,7 +65,7 @@ def test_build_registry_with_includelist(self): assert len(registry) == 3, "Should include 2 from list + 1 essential" assert "contextmanager" in registry assert "edittext" in registry - assert "finished" in registry # Essential + assert "yield" in registry # Essential assert "command" not in registry, "Should not include tools not in includelist" def test_build_registry_with_excludelist(self): @@ -86,7 +86,7 @@ def test_build_registry_exclude_essential(self): # Essential tools should still be included despite excludelist assert "contextmanager" in registry, "Essential tool cannot be excluded" assert "edittext" in registry, "Essential tool cannot be excluded" - assert "finished" in registry, "Essential tool cannot be excluded" + assert "yield" in registry, "Essential tool cannot be excluded" assert "command" not in registry, "Non-essential tool should be excluded" def test_build_registry_combined_filters(self): @@ -98,10 +98,10 @@ def test_build_registry_combined_filters(self): registry = ToolRegistry.build_registry(config) # Should respect all filters - assert len(registry) == 4, "Should include exactly 4 tools (3 from list + finished)" + assert len(registry) == 4, "Should include exactly 4 tools (3 from list + yield)" assert "contextmanager" in registry assert "edittext" in registry - assert "finished" in registry + assert "yield" in registry assert "command" in registry assert "commandinteractive" not in registry @@ -117,7 +117,7 @@ def test_get_filtered_tools(self): assert len(tool_names) == 3 assert "contextmanager" in tool_names assert "edittext" in tool_names - assert "finished" in tool_names # Essential tool always included + assert "yield" in tool_names # Essential tool always included def test_legacy_config_names(self): """Test backward compatibility with legacy config names (whitelist/blacklist)""" @@ -148,7 +148,7 @@ def test_config_precedence(self): "command" not in registry ), "Should not use tools_whitelist when tools_includelist present" assert "commandinteractive" not in registry, "Should use tools_excludelist" - assert "finished" in registry, "Essential tool cannot be excluded" + assert "yield" in registry, "Essential tool cannot be excluded" def test_registry_consistency(self): """Test that registry methods return consistent results""" From fb996af203263ff5b1b4eefa7214ee5946ecc90c Mon Sep 17 00:00:00 2001 From: Your Name Date: Sat, 30 May 2026 13:29:22 -0400 Subject: [PATCH 22/37] Fix yield tool not finishing prematurely --- cecli/tools/_yield.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cecli/tools/_yield.py b/cecli/tools/_yield.py index c1372c1bdca..1e6bb12b47c 100644 --- a/cecli/tools/_yield.py +++ b/cecli/tools/_yield.py @@ -104,7 +104,12 @@ async def execute(cls, coder, **kwargs): except asyncio.CancelledError: pass - # Fall through to the normal finishing flow below + # Don't mark as finished — the coder should review sub-agent + # outputs and decide how to proceed + return ( + "Sub-agents have finished. Please examine their output above " + "in order to decide how you will proceed." + ) except Exception as e: logger.warning("Error awaiting child sub-agents before yield: %s", e) From 99c62a08ead1a2a846fa51960801bcb5d4403ca3 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sat, 30 May 2026 15:06:19 -0400 Subject: [PATCH 23/37] Allow automatic reaping of sub agents --- cecli/coders/agent_coder.py | 8 ++ cecli/commands/spawn_agent.py | 2 +- cecli/helpers/agents/config.py | 8 +- cecli/helpers/agents/service.py | 167 ++++++++++++++++++++++--- cecli/tools/_yield.py | 12 +- cecli/website/docs/config/subagents.md | 23 +++- tests/subagents/test_commands.py | 2 +- 7 files changed, 201 insertions(+), 21 deletions(-) diff --git a/cecli/coders/agent_coder.py b/cecli/coders/agent_coder.py index 822e640d0b5..ac384c4c39a 100644 --- a/cecli/coders/agent_coder.py +++ b/cecli/coders/agent_coder.py @@ -875,6 +875,13 @@ async def reply_completed(self): content = self.partial_response_content tool_calls_found = bool(self.partial_response_tool_calls) + # Reap all finished sub-agents with auto_reap enabled + try: + service = AgentService.get_instance(self) + await service.reap_all_finished_agents(parent=service.get_parent(self)) + except Exception: + logger.warning("Failed to reap finished sub-agents", exc_info=True) + # 1. Handle Tool Execution Follow-up (Reflection) if self.agent_finished: self.tool_usage_history = [] @@ -882,6 +889,7 @@ async def reply_completed(self): self.reflected_message = None if self.files_edited_by_tools: _ = await self.auto_commit(self.files_edited_by_tools) + return False # 2. Check for unfinished and recently finished background commands diff --git a/cecli/commands/spawn_agent.py b/cecli/commands/spawn_agent.py index 33ffe3c7953..de77578713f 100644 --- a/cecli/commands/spawn_agent.py +++ b/cecli/commands/spawn_agent.py @@ -27,7 +27,7 @@ async def execute(cls, io, coder, args, **kwargs): try: agent_service = AgentService.get_instance(coder) - await agent_service.spawn(name, prompt, parent=coder) + await agent_service.spawn(name, prompt, parent=coder, auto_reap=False) if coder.tui and coder.tui(): switch_key = coder.tui().get_keys_for("next_agent") diff --git a/cecli/helpers/agents/config.py b/cecli/helpers/agents/config.py index d054c823773..b99ab76bf15 100644 --- a/cecli/helpers/agents/config.py +++ b/cecli/helpers/agents/config.py @@ -19,6 +19,7 @@ class SubAgentConfig: prompt: str = "" model: Optional[str] = None hooks: Dict[str, Any] = field(default_factory=dict) + auto_reap: Optional[bool] = None metadata: Dict[str, Any] = field(default_factory=dict) @@ -71,13 +72,18 @@ def parse_subagent_file(file_path: str) -> Optional[SubAgentConfig]: hooks_data = frontmatter_data.get("hooks", {}) if not isinstance(hooks_data, dict): hooks_data = {} - metadata = {k: v for k, v in frontmatter_data.items() if k not in ("name", "model", "hooks")} + metadata = { + k: v + for k, v in frontmatter_data.items() + if k not in ("name", "model", "hooks", "auto_reap") + } config = SubAgentConfig( name=name, prompt=prompt, model=frontmatter_data.get("model"), hooks=hooks_data, + auto_reap=frontmatter_data.get("auto_reap"), metadata=metadata, ) diff --git a/cecli/helpers/agents/service.py b/cecli/helpers/agents/service.py index 34fe637901b..6ec7d6e0dd2 100644 --- a/cecli/helpers/agents/service.py +++ b/cecli/helpers/agents/service.py @@ -47,6 +47,7 @@ class SubAgentInfo: generate_task: Optional[asyncio.Task] = ( None # Track the generate() task for cancellation/monitoring ) + auto_reap: bool = True # If True, agent may be automatically reaped when FINISHED class AgentService: @@ -237,13 +238,84 @@ def _get_tui(coder: Any) -> Any: # ------------------------------------------------------------------ # def _reap_finished_agent(self) -> None: - """Remove the oldest FINISHED sub-agent (lazy reap).""" + """Remove the oldest FINISHED or ERROR sub-agent (lazy reap). + + Only reaps sub-agents whose descendants (children, grandchildren, etc.) + have all also finished. This prevents reaping a sub-agent while it + still has running descendant tasks that its ``generate()`` loop may + need to process. + """ + # Build parent → children mapping + parent_to_children: Dict[str, List[SubAgentInfo]] = {} + for info in self.sub_agents.values(): + parent_to_children.setdefault(info.parent_uuid, []).append(info) + + def _has_unfinished_descendants(agent_uuid: str) -> bool: + """Return True if *agent_uuid* has any non-FINISHED/non-ERROR descendant.""" + for child in parent_to_children.get(agent_uuid, []): + if child.status not in (SubAgentStatus.FINISHED, SubAgentStatus.ERROR): + return True + if _has_unfinished_descendants(child.coder.uuid): + return True + return False + for coder_uuid in list(self._sub_agent_order): info = self.sub_agents.get(coder_uuid) - if info and info.status == SubAgentStatus.FINISHED: + if ( + info + and info.status in (SubAgentStatus.FINISHED, SubAgentStatus.ERROR) + and info.auto_reap + and (info.generate_task is None or info.generate_task.done()) + and not _has_unfinished_descendants(coder_uuid) + ): self._cleanup_sub_agent(coder_uuid) return + async def reap_all_finished_agents(self, parent: Any = None) -> None: + """Remove all FINISHED or ERROR sub-agents that have ``auto_reap`` enabled. + + Builds a parent→children mapping of all sub-agents and only reaps + finished sub-agents whose descendants (children, grandchildren, etc.) + have all also finished. This prevents reaping a sub-agent while it + still has running descendant tasks that its ``generate()`` loop may + need to process. Acquires the spawn lock for the given *parent* + (or ``self.coder`` if omitted) to serialise with concurrent + ``_create_sub_agent_coder()`` operations under the same parent. + + Args: + parent: Optional coder instance whose spawn lock will be acquired. + If provided, reaping is serialised against spawns under this + specific parent. Defaults to ``self.coder``. + """ + # Build parent → children mapping + parent_to_children: Dict[str, List[SubAgentInfo]] = {} + for info in self.sub_agents.values(): + parent_to_children.setdefault(info.parent_uuid, []).append(info) + + def _has_unfinished_descendants(agent_uuid: str) -> bool: + """Return True if *agent_uuid* has any non-FINISHED/non-ERROR descendant.""" + for child in parent_to_children.get(agent_uuid, []): + if child.status not in (SubAgentStatus.FINISHED, SubAgentStatus.ERROR): + return True + if _has_unfinished_descendants(child.coder.uuid): + return True + return False + + # Acquire the spawn lock for the primary coder to serialise with + # concurrent spawn operations that also hold this lock. + parent_coder = parent if parent is not None else self.coder + async with self._get_lock(self._spawn_locks, parent_coder.uuid): + for coder_uuid in list(self._sub_agent_order): + info = self.sub_agents.get(coder_uuid) + if ( + info + and info.status in (SubAgentStatus.FINISHED, SubAgentStatus.ERROR) + and info.auto_reap + and (info.generate_task is None or info.generate_task.done()) + and not _has_unfinished_descendants(coder_uuid) + ): + self._cleanup_sub_agent(coder_uuid) + def _cleanup_sub_agent(self, agent_uuid: str) -> None: """Remove agent instance from tracking and notify TUI if possible.""" info = self.sub_agents.pop(agent_uuid, None) @@ -301,7 +373,9 @@ def _check_max_sub_agents(self) -> None: Raises RuntimeError if no finished agents can be reaped. """ active_count = sum( - 1 for info in self.sub_agents.values() if info.status != SubAgentStatus.FINISHED + 1 + for info in self.sub_agents.values() + if info.status not in (SubAgentStatus.FINISHED, SubAgentStatus.ERROR) ) if active_count < self.max_sub_agents: return @@ -311,7 +385,9 @@ def _check_max_sub_agents(self) -> None: # Recalculate active count after reaping active_count = sum( - 1 for info in self.sub_agents.values() if info.status != SubAgentStatus.FINISHED + 1 + for info in self.sub_agents.values() + if info.status not in (SubAgentStatus.FINISHED, SubAgentStatus.ERROR) ) if active_count >= self.max_sub_agents: raise RuntimeError( @@ -320,7 +396,7 @@ def _check_max_sub_agents(self) -> None: ) async def _create_sub_agent_coder( - self, name: str, parent: Any = None + self, name: str, parent: Any = None, auto_reap: Optional[bool] = None ) -> Tuple[Any, SubAgentInfo]: """Create a sub-agent coder, register it, and set up its container and prompt. @@ -329,10 +405,13 @@ async def _create_sub_agent_coder( Args: name: Name of the sub-agent to create. - parent: Optional coder instance to use as the parent. - If provided, the new sub-agent's ``parent_uuid`` will be - ``parent.uuid`` instead of ``self.coder.uuid``, enabling - nested sub-agent hierarchies. Defaults to ``self.coder``. + parent: Optional coder instance to use as the parent for nested + sub-agent hierarchies. If provided, the new sub-agent's + ``parent_uuid`` will be ``parent.uuid`` instead of + ``self.coder.uuid``. Defaults to ``self.coder``. + auto_reap: If True, agent may be automatically reaped when FINISHED. + If not set, defers to the sub-agent config's ``auto_reap`` + value, then defaults to ``True``. Returns: Tuple of ``(new_coder, info)``. @@ -347,6 +426,12 @@ async def _create_sub_agent_coder( f"Unknown sub-agent '{name}'. " f"Available: {list(self._global_registry.keys())}" ) + # Resolve auto_reap: None means defer to sub-agent config, then default to True + if auto_reap is None: + auto_reap = getattr(config, "auto_reap", None) + if auto_reap is None: + auto_reap = True + # Critical section: max-sub-agent check and registration must be atomic # to prevent TOCTOU race when multiple spawns fire concurrently. # Coder.create() is called *outside* the lock to avoid holding an @@ -393,8 +478,8 @@ async def _create_sub_agent_coder( coder=new_coder, parent_uuid=parent_coder.uuid, status=SubAgentStatus.CREATED, + auto_reap=auto_reap, ) - self.sub_agents[new_coder.uuid] = info self._sub_agent_order.append(new_coder.uuid) @@ -583,7 +668,12 @@ async def _inject_sub_agent_result(self, info: SubAgentInfo) -> None: ) async def invoke( - self, name: str, prompt: str, blocking: bool = True, parent: Any = None + self, + name: str, + prompt: str, + blocking: bool = True, + parent: Any = None, + auto_reap: Optional[bool] = None, ) -> Optional[str]: """Invoke a sub-agent by name with the given prompt (blocking by default). @@ -594,7 +684,9 @@ async def invoke( parent: Optional coder instance to use as the parent for nested sub-agent hierarchies. Defaults to ``self.coder``. """ - new_coder, info = await self._create_sub_agent_coder(name, parent) + new_coder, info = await self._create_sub_agent_coder( + name, auto_reap=auto_reap, parent=parent + ) if not blocking: return None @@ -605,7 +697,11 @@ async def invoke( return info.summary async def spawn( - self, name: str, prompt: Optional[str] = None, parent: Any = None + self, + name: str, + prompt: Optional[str] = None, + parent: Any = None, + auto_reap: Optional[bool] = None, ) -> Tuple[Any, SubAgentInfo]: """Spawn a sub-agent (non-blocking) that waits for user input. @@ -620,7 +716,9 @@ async def spawn( Tuple of ``(new_coder, info)`` so callers can further interact with the sub-agent (e.g. call ``start_generate_task`` later). """ - new_coder, info = await self._create_sub_agent_coder(name, parent) + new_coder, info = await self._create_sub_agent_coder( + name, auto_reap=auto_reap, parent=parent + ) if prompt: self.start_generate_task(info, prompt) return new_coder, info @@ -689,6 +787,47 @@ def get_children(self, coder_or_uuid: Any) -> List[SubAgentInfo]: return [info for info in self.sub_agents.values() if info.parent_uuid == uid] + def get_parent(self, coder_or_uuid: Any) -> Any: + """Return the parent coder for the given coder or UUID. + + If the given coder is the primary coder (``self.coder``), returns itself. + Otherwise, looks up the sub-agent's parent in the tracking data and + returns that parent's coder instance. + + This is used for lock key resolution when reaping from a sub-agent + context — the spawn lock should be acquired with the parent's UUID + to properly serialise with concurrent spawn operations under that + same parent. + + Args: + coder_or_uuid: A coder instance (with ``.uuid``) or a UUID string. + + Returns: + The parent coder instance, or ``self.coder`` if the given coder is + the primary coder or has no known parent. + """ + if hasattr(coder_or_uuid, "uuid"): + uid = str(coder_or_uuid.uuid) + else: + uid = str(coder_or_uuid) + + # Primary coder returns itself + if uid == self.coder.uuid: + return self.coder + + # Look up the sub-agent to find its parent_uuid + info = self.sub_agents.get(uid) + if info and info.parent_uuid: + # Parent is the primary coder + if info.parent_uuid == self.coder.uuid: + return self.coder + # Parent is another sub-agent — look up its coder + parent_info = self.sub_agents.get(info.parent_uuid) + if parent_info: + return parent_info.coder + + return self.coder + # ------------------------------------------------------------------ # # Foreground agent tracking # ------------------------------------------------------------------ # diff --git a/cecli/tools/_yield.py b/cecli/tools/_yield.py index 1e6bb12b47c..a95b7343118 100644 --- a/cecli/tools/_yield.py +++ b/cecli/tools/_yield.py @@ -40,13 +40,13 @@ async def execute(cls, coder, **kwargs): This gives the LLM explicit control over when it can stop looping """ + from cecli.helpers.agents.service import AgentService + cls.clear_invocation_cache() if coder: # Check for active child sub-agents and await their tasks before finishing try: - from cecli.helpers.agents.service import AgentService - agent_service = AgentService.get_instance(coder) children = agent_service.get_children(coder) active_tasks = [ @@ -113,6 +113,13 @@ async def execute(cls, coder, **kwargs): except Exception as e: logger.warning("Error awaiting child sub-agents before yield: %s", e) + # Reap all finished sub-agents with auto_reap enabled + try: + service = AgentService.get_instance(coder) + await service.reap_all_finished_agents(parent=service.get_parent(coder)) + except Exception: + logger.warning("Failed to reap finished sub-agents", exc_info=True) + coder.agent_finished = True # If this is a sub-agent, capture the summary for the parent @@ -120,7 +127,6 @@ async def execute(cls, coder, **kwargs): parent_uuid = coder.parent_uuid if parent_uuid: try: - from cecli.helpers.agents.service import AgentService AgentService.mark_sub_agent_finished( sub_coder_uuid=coder.uuid, diff --git a/cecli/website/docs/config/subagents.md b/cecli/website/docs/config/subagents.md index ac223f24830..da0585a527b 100644 --- a/cecli/website/docs/config/subagents.md +++ b/cecli/website/docs/config/subagents.md @@ -43,6 +43,7 @@ and suggestions for improvement. | `name` | Yes | Unique name used to reference the sub-agent in commands and the Delegate tool | | `model` | No | Model override for this sub-agent. If omitted, inherits the parent agent's model | | `hooks` | No | Per-agent hooks configuration (see [Hooks](/config/hooks) for syntax) | +| `auto_reap` | No | Controls whether this sub-agent is automatically reaped when the limit is reached. Defaults to `true` if omitted | #### System Prompt @@ -61,6 +62,8 @@ agent-config: - "~/team-agents" # Custom path for shared agent definitions ``` + + ## Usage ### Available Commands @@ -142,9 +145,27 @@ The `max_sub_agents` setting (default: 3) limits how many concurrent sub-agents When the limit is reached: -- If any sub-agents have **finished**, the oldest finished one is automatically reaped to make room +- If any sub-agents have **finished** and have `auto_reap: true` (the default), the oldest finished one is automatically reaped to make room - If all sub-agents are still **running**, a `RuntimeError` is raised. You must wait for one to finish or use `/reap-agent` to free resources. +#### Auto-Reap + +The `auto_reap` field in the sub-agent definition's YAML front matter controls whether a finished sub-agent is automatically reaped when the maximum sub-agent limit is reached. When `true` (the default), the oldest finished sub-agent will be removed to make room for new ones. + +```markdown +--- +name: reviewer +model: deepseek/deepseek-v4-pro +auto_reap: false # Prevent automatic reaping of this agent +--- +You are a code review specialist. +``` + +- **`/spawn-agent`** always spawns sub-agents with `auto_reap=false` — since these agents are created manually by the user, they should persist until explicitly reaped with `/reap-agent`. +- **`Delegate` tool** uses the sub-agent's configured `auto_reap` value from its definition. If not set in the `.md` front matter, it defaults to `true`. + +Sub-agents with `auto_reap: true` that finish their work are candidates for automatic cleanup when the agent limit is reached. Sub-agents with `auto_reap: false` are never automatically reaped and must be cleaned up manually. + ### Cleanup - **Normal completion**: A sub-agent calls `Yield(summary="...")` which marks it as finished. Its container remains visible but its resources are eligible for lazy cleanup. diff --git a/tests/subagents/test_commands.py b/tests/subagents/test_commands.py index c5210736a0b..4cb340c1c84 100644 --- a/tests/subagents/test_commands.py +++ b/tests/subagents/test_commands.py @@ -36,7 +36,7 @@ async def test_valid_name_calls_spawn(self): await SpawnAgentCommand.execute(io, coder, "reviewer") - mock_instance.spawn.assert_called_once_with("reviewer", None, parent=coder) + mock_instance.spawn.assert_called_once_with("reviewer", None, parent=coder, auto_reap=False) io.tool_output.assert_called_once() assert "spawned" in io.tool_output.call_args[0][0] From 7df28767a0a122d29ccb840be4232b7cec759f21 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 31 May 2026 15:44:59 -0400 Subject: [PATCH 24/37] Update tool parsing referencing PR #536 but a bit more idiomatically laid out --- cecli/coders/agent_coder.py | 41 ++- cecli/coders/base_coder.py | 14 + cecli/helpers/responses.py | 367 +++++++++++++++++++++++++- cecli/tools/delegate.py | 1 + cecli/tools/edit_text.py | 1 + cecli/tools/explore_code.py | 1 + cecli/tools/grep.py | 1 + cecli/tools/read_range.py | 1 + cecli/tools/update_todo_list.py | 1 + cecli/tools/utils/base_tool.py | 9 +- cecli/tools/utils/helpers.py | 45 ++++ tests/tools/test_extractions.py | 406 +++++++++++++++++++++++++++++ tests/tools/test_tool_arguments.py | 298 +++++++++++++++++++++ 13 files changed, 1156 insertions(+), 30 deletions(-) create mode 100644 tests/tools/test_extractions.py create mode 100644 tests/tools/test_tool_arguments.py diff --git a/cecli/coders/agent_coder.py b/cecli/coders/agent_coder.py index ba5b88b515b..8d77628ce25 100644 --- a/cecli/coders/agent_coder.py +++ b/cecli/coders/agent_coder.py @@ -12,7 +12,6 @@ from datetime import datetime from pathlib import Path -from cecli import utils from cecli.change_tracker import ChangeTracker from cecli.helpers import nested, responses from cecli.helpers.agents.service import AgentService @@ -732,25 +731,23 @@ async def _execute_local_tools(self, tool_calls_list): continue if args_string: - json_chunks = utils.split_concatenated_json(args_string) - for chunk in json_chunks: - try: - parsed_args_list.append(json.loads(chunk)) - except json.JSONDecodeError as e: - self.model_kwargs = {} - self.io.tool_warning( - f"Malformed JSON arguments in tool {tool_name}: {chunk}" - ) - tool_responses.append( - { - "role": "tool", - "tool_call_id": tool_call.id, - "content": ( - f"Malformed JSON arguments in tool {tool_name}: {str(e)}" - ), - } - ) - continue + parsed = responses.parse_tool_arguments(args_string) + if isinstance(parsed, dict) and "@error" in parsed: + self.io.tool_warning( + f"Malformed JSON arguments in tool {tool_name}: {parsed['@error']}" + ) + tool_responses.append( + { + "role": "tool", + "tool_call_id": tool_call.id, + "content": ( + f"Malformed JSON arguments in tool {tool_name}: {parsed['@error']}" + ), + } + ) + continue + parsed_args_list = [parsed] + if not parsed_args_list and not args_string: parsed_args_list.append({}) all_results_content = [] @@ -846,7 +843,9 @@ async def _execute_mcp_tools(self, server, tool_calls): for tool_call in tool_calls: # Use existing _execute_mcp_tool logic result = await self._execute_mcp_tool( - server, tool_call.function.name, json.loads(tool_call.function.arguments) + server, + tool_call.function.name, + responses.parse_tool_arguments(tool_call.function.arguments), ) responses.append( { diff --git a/cecli/coders/base_coder.py b/cecli/coders/base_coder.py index ff7a92829b6..b6dc6d6f69f 100755 --- a/cecli/coders/base_coder.py +++ b/cecli/coders/base_coder.py @@ -2655,6 +2655,13 @@ def _expand_concatenated_json(self, tool_calls): expanded_tool_calls.append(tool_call) continue + merged = responses.merge_glued_json_objects(json_chunks) + if merged is not None: + new_tool_call = copy_tool_call(tool_call) + new_tool_call.function.arguments = json.dumps(merged) + expanded_tool_calls.append(new_tool_call) + continue + # We have concatenated JSON, so expand it into multiple tool calls. for i, chunk in enumerate(json_chunks): if not chunk.strip(): @@ -3638,12 +3645,19 @@ def consolidate_chunks(self): extracted_calls = responses.extract_tools_from_content_json( self.partial_response_content ) + if not extracted_calls: extracted_calls = responses.extract_tools_from_content_xml( self.partial_response_content ) + if not extracted_calls: + extracted_calls = responses.extract_tools_from_pseudo_json( + self.partial_response_content + ) + if extracted_calls: + self.tool_reflection = True self.partial_response_tool_calls = extracted_calls self.partial_response_consolidated = (response, func_err, content_err) diff --git a/cecli/helpers/responses.py b/cecli/helpers/responses.py index 6f4762e2a88..87d10a2daff 100644 --- a/cecli/helpers/responses.py +++ b/cecli/helpers/responses.py @@ -6,6 +6,7 @@ from litellm.types.utils import ChatCompletionMessageToolCall, Function from cecli import utils +from cecli.helpers import nested def preprocess_json(response: str) -> str: @@ -45,14 +46,21 @@ def extract_tools_from_content_json(content: str) -> Optional[List[ChatCompletio chunk_index += 1 try: json_obj = json.loads(chunk) - if isinstance(json_obj, dict) and "name" in json_obj and "arguments" in json_obj: + arg_keys = ["arguments", "parameters", "params"] + + if ( + isinstance(json_obj, dict) + and "name" in json_obj + and any(key in json_obj for key in arg_keys) + ): # Create a Pydantic model for the tool call + json_args = nested.getter(json_obj, arg_keys) function_obj = Function( name=json_obj["name"], arguments=( - json.dumps(json_obj["arguments"]) - if isinstance(json_obj["arguments"], (dict, list)) - else str(json_obj["arguments"]) + json.dumps(json_args) + if isinstance(json_args, (dict, list)) + else str(json_args) ), ) tool_call_obj = ChatCompletionMessageToolCall( @@ -63,13 +71,18 @@ def extract_tools_from_content_json(content: str) -> Optional[List[ChatCompletio extracted_calls.append(tool_call_obj) elif isinstance(json_obj, list): for item in json_obj: - if isinstance(item, dict) and "name" in item and "arguments" in item: + if ( + isinstance(item, dict) + and "name" in item + and any(key in item for key in arg_keys) + ): + item_args = nested.getter(item, arg_keys) function_obj = Function( name=item["name"], arguments=( - json.dumps(item["arguments"]) - if isinstance(item["arguments"], (dict, list)) - else str(item["arguments"]) + json.dumps(item_args) + if isinstance(item_args, (dict, list)) + else str(item_args) ), ) tool_call_obj = ChatCompletionMessageToolCall( @@ -132,6 +145,91 @@ def extract_tools_from_content_xml(content: str) -> Optional[List[ChatCompletion return None +def extract_tools_from_pseudo_json(content: str) -> Optional[List[ChatCompletionMessageToolCall]]: + """ + Extraction of tool calls from bracket format. + + Handles blocks shaped like: + [ToolName(arg1=value1, arg2=value2, ...)] + + Where values can be JSON arrays, objects, booleans, strings, or numbers. + The parser handles nested parentheses and commas inside JSON values. + + Example: + [Local--ReadRange(show=[{"file_path": "agent.py", "start_text": "class A"}], verbose=true, mode="strict")] + """ + if not content or "[" not in content: + return None + + try: + extracted_calls = [] + + # Scan through content to find all [ToolName(...)] blocks + i = 0 + while i < len(content): + bracket_start = content.find("[", i) + if bracket_start == -1: + break + + # Find the opening paren after the bracket + paren_start = content.find("(", bracket_start) + if paren_start == -1: + i = bracket_start + 1 + continue + + tool_name = content[bracket_start + 1 : paren_start].strip() + if not tool_name or not re.match(r"^[a-zA-Z0-9_\\-]+$", tool_name): + i = paren_start + 1 + continue + + # Find matching closing paren tracking nesting depth + depth = 1 + paren_end = -1 + pos = paren_start + 1 + while pos < len(content) and depth > 0: + if content[pos] == "(": + depth += 1 + elif content[pos] == ")": + depth -= 1 + if depth == 0: + paren_end = pos + break + pos += 1 + + if paren_end == -1: + i = paren_start + 1 + continue + + # Expect "]" after ")" + if paren_end + 1 >= len(content) or content[paren_end + 1] != "]": + i = paren_end + 1 + continue + + # Extract the payload between the parentheses + payload = content[paren_start + 1 : paren_end] + + # Parse the arguments from the payload + args = _parse_bracket_arguments(payload) + + # Create a tool call object + function_obj = Function( + name=tool_name, + arguments=json.dumps(args), + ) + tool_call_obj = ChatCompletionMessageToolCall( + type="function", + function=function_obj, + id=f"bracket_call_{len(extracted_calls)}_{int(time.time())}", + ) + extracted_calls.append(tool_call_obj) + + i = paren_end + 2 # Skip past ")]" + + return extracted_calls if extracted_calls else None + except Exception: + return None + + def prefix_tool_name(server_name: str, tool_name: str) -> str: """ Prefix a tool name with the server name. @@ -243,3 +341,256 @@ def unprefix_tool_call(tool_call): result["function"]["name"] = unprefixed_name return server_name, result + + +def parse_tool_arguments(args_string: str) -> dict: + """Parse tool-call arguments, merging glued ``{…}{} {…}`` object fragments.""" + text = (args_string or "").strip() + if not text: + return {} + try: + parsed = json.loads(text) + if isinstance(parsed, dict): + return parsed + except json.JSONDecodeError: + pass + + parsed = try_parse_json_value(text) + if isinstance(parsed, dict): + return parsed + + chunks = utils.split_concatenated_json(text) + if len(chunks) <= 1: + if not chunks: + return {} + lone = try_parse_json_value(chunks[0]) + if isinstance(lone, dict): + return lone + try: + single = json.loads(chunks[0]) + except json.JSONDecodeError as err: + return {"@error": f"Malformed JSON arguments: {err}"} + return single if isinstance(single, dict) else {} + + merged = merge_glued_json_objects(chunks) + + if merged is not None: + return merged + + return { + "@error": "Could not merge glued JSON objects: argument fragments are not all JSON objects" + } + + +def merge_glued_json_objects(chunks: list[str]) -> dict | None: + """ + Merge consecutive JSON object strings from glued local-model tool args. + + Example: ``{"limit": 15}{}{"path": "."}`` → ``{"limit": 15, "path": "."}``. + Returns ``None`` when chunks are not all mergeable objects (caller may split). + """ + merged: dict = {} + saw_non_empty = False + for chunk in chunks: + text = chunk.strip() + if not text: + continue + obj = try_parse_json_value(text) + if obj is None: + try: + obj = json.loads(text) + except json.JSONDecodeError: + return None + if isinstance(obj, list): + return None + if not isinstance(obj, dict): + return None + if obj: + merged.update(obj) + saw_non_empty = True + if saw_non_empty or merged == {}: + return merged + return None + + +def try_parse_json_value(text: str): + """Parse JSON text, including repairs for common local-model tool-arg quirks.""" + text = text.strip() + if not text: + return None + for candidate in (text, _repair_local_model_json_text(text)): + try: + return json.loads(candidate) + except json.JSONDecodeError: + continue + if "}{" in text: + chunks = utils.split_concatenated_json(text) + if len(chunks) == 1: + try: + return json.loads(chunks[0]) + except json.JSONDecodeError: + pass + elif len(chunks) > 1: + parsed = [] + for chunk in chunks: + try: + parsed.append(json.loads(chunk)) + except json.JSONDecodeError: + parsed = None + break + if parsed is not None: + return parsed + if len(text) >= 8: + coerced = try_join_char_split_json_array(list(text)) + if coerced is not None: + return coerced + return None + + +def try_join_char_split_json_array(items: list) -> list | None: + """ + Some local models emit a JSON array as one string per character in tool args. + + Example: tasks=["[", "{", "\\"", "t", "a", "s", "k", "\\"", ...] instead of + tasks='[{"task": "...", "done": false}]'. + """ + if len(items) < 8: + return None + # Quick check: the first item must be the opening bracket of a JSON construct. + # This avoids O(n) string-joining for legitimate string lists like + # ["file_a.py", "file_b.py", ...] where the first item isn't JSON-like. + first = items[0] + if first not in ("[", "{"): + return None + + if not all(isinstance(x, str) for x in items): + return None + + joined = "".join(items).strip() + if not joined.startswith(("[", "{")): + return None + try: + parsed = json.loads(joined) + except json.JSONDecodeError: + return None + if isinstance(parsed, dict): + return [parsed] + if isinstance(parsed, list): + return parsed + return None + + +def _repair_local_model_json_text(text: str) -> str: + """ + Repair common local-model breakage in double-encoded tool JSON. + + Models sometimes emit a literal newline between ``:`` and the opening quote + of a string value (e.g. ``"end_text":\\n",`` instead of ``"end_text": "",``). + """ + repaired = re.sub(r':\s*\n\s*",', ': "",', text) + repaired = re.sub(r':\s*\n\s*"}', ': ""}', repaired) + return repaired + + +def _parse_bracket_arguments(payload_str: str) -> dict: + """Parse multiple arguments from a bracket-style payload. + + Uses depth-aware scanning instead of regex-based boundary detection, + so that ``=`` signs nested inside JSON strings, arrays, or objects + are not mistaken for argument separators. + + Example: ``show=[...], verbose=true, mode="strict"`` + """ + arguments: dict = {} + i = 0 + n = len(payload_str) + + while i < n: + # Skip whitespace and inter-argument commas + while i < n and payload_str[i] in " ,\t": + i += 1 + if i >= n: + break + + # Extract key name (alphanumeric, underscore, hyphen) + key_start = i + while i < n and (payload_str[i].isalnum() or payload_str[i] in "_-"): + i += 1 + key = payload_str[key_start:i] + + if not key: + i += 1 + continue + + # Skip whitespace before '=' + while i < n and payload_str[i] in " \t": + i += 1 + if i >= n or payload_str[i] != "=": + i += 1 + continue + i += 1 # skip '=' + + # Skip whitespace before value + while i < n and payload_str[i] in " \t": + i += 1 + + # Extract value with depth tracking + value_start = i + depth_paren = 0 # () + depth_brace = 0 # {} + depth_bracket = 0 # [] + in_dquote = False + in_squote = False + + while i < n: + ch = payload_str[i] + + if in_dquote: + if ch == "\\": + i += 2 # skip escaped character + continue + if ch == '"': + in_dquote = False + elif in_squote: + if ch == "\\": + i += 2 + continue + if ch == "'": + in_squote = False + else: + if ch == '"': + in_dquote = True + elif ch == "'": + in_squote = True + elif ch == "(": + depth_paren += 1 + elif ch == ")": + depth_paren -= 1 + elif ch == "{": + depth_brace += 1 + elif ch == "}": + depth_brace -= 1 + elif ch == "[": + depth_bracket += 1 + elif ch == "]": + depth_bracket -= 1 + elif ch == ",": + # Comma at depth 0 = next argument separator + if depth_paren == 0 and depth_brace == 0 and depth_bracket == 0: + break + + i += 1 + + val_str = payload_str[value_start:i].strip() + # Drop trailing comma left by the break + if val_str.endswith(","): + val_str = val_str[:-1].strip() + + # Try to parse the value as native JSON + try: + arguments[key] = json.loads(val_str) + except (json.JSONDecodeError, ValueError): + # Fallback if it is unquoted plain text + arguments[key] = val_str + + return arguments diff --git a/cecli/tools/delegate.py b/cecli/tools/delegate.py index 660db6c276d..1fa6a5313ff 100644 --- a/cecli/tools/delegate.py +++ b/cecli/tools/delegate.py @@ -10,6 +10,7 @@ class Tool(BaseTool): NORM_NAME = "delegate" TRACK_INVOCATIONS = True + LIST_PARAMS = ["delegations"] SCHEMA = { "type": "function", "function": { diff --git a/cecli/tools/edit_text.py b/cecli/tools/edit_text.py index df6c8fc8c56..8f5ed549322 100644 --- a/cecli/tools/edit_text.py +++ b/cecli/tools/edit_text.py @@ -27,6 +27,7 @@ class Tool(BaseTool): NORM_NAME = "edittext" TRACK_INVOCATIONS = False + LIST_PARAMS = ["edits"] SCHEMA = { "type": "function", "function": { diff --git a/cecli/tools/explore_code.py b/cecli/tools/explore_code.py index dbe12c60377..c95f8df8acd 100644 --- a/cecli/tools/explore_code.py +++ b/cecli/tools/explore_code.py @@ -19,6 +19,7 @@ class Tool(BaseTool): NORM_NAME = "explorecode" + LIST_PARAMS = ["queries"] SCHEMA = { "type": "function", "function": { diff --git a/cecli/tools/grep.py b/cecli/tools/grep.py index 03f51d57275..deb9db27d60 100644 --- a/cecli/tools/grep.py +++ b/cecli/tools/grep.py @@ -12,6 +12,7 @@ class Tool(BaseTool): NORM_NAME = "grep" + LIST_PARAMS = ["searches"] SCHEMA = { "type": "function", "function": { diff --git a/cecli/tools/read_range.py b/cecli/tools/read_range.py index a9eaab3abfc..fe286b69645 100644 --- a/cecli/tools/read_range.py +++ b/cecli/tools/read_range.py @@ -16,6 +16,7 @@ class Tool(BaseTool): NORM_NAME = "readrange" TRACK_INVOCATIONS = False + LIST_PARAMS = ["show"] SCHEMA = { "type": "function", "function": { diff --git a/cecli/tools/update_todo_list.py b/cecli/tools/update_todo_list.py index 223c85256a1..8d9395b22e7 100644 --- a/cecli/tools/update_todo_list.py +++ b/cecli/tools/update_todo_list.py @@ -5,6 +5,7 @@ class Tool(BaseTool): NORM_NAME = "updatetodolist" + LIST_PARAMS = ["tasks"] SCHEMA = { "type": "function", "function": { diff --git a/cecli/tools/utils/base_tool.py b/cecli/tools/utils/base_tool.py index fa7e33c5758..2ed174594d2 100644 --- a/cecli/tools/utils/base_tool.py +++ b/cecli/tools/utils/base_tool.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod -from cecli.tools.utils.helpers import handle_tool_error +from cecli.tools.utils.helpers import handle_tool_error, normalize_json_array from cecli.tools.utils.output import print_tool_response @@ -12,6 +12,9 @@ class BaseTool(ABC): NORM_NAME = None SCHEMA = None + # Parameters to run normalization checks on + LIST_PARAMS = [] + # Invocation tracking for detecting repeated tool calls _invocations = {} # Dict to store last 3 invocations per tool _invocation_summary = set() # Set to track distinct tool names @@ -119,6 +122,10 @@ def process_response(cls, coder, params): coder, tool_name, ValueError(error_msg), add_traceback=False ) + for param in cls.LIST_PARAMS: + if param in params: + params[param] = normalize_json_array(params[param], param_name=param) + # Add current invocation to history (keeping only last 3) if params: cls._invocations[tool_name].append((current_params_tuple, params)) diff --git a/cecli/tools/utils/helpers.py b/cecli/tools/utils/helpers.py index d3c219383bb..f05e2eda8f9 100644 --- a/cecli/tools/utils/helpers.py +++ b/cecli/tools/utils/helpers.py @@ -1,8 +1,11 @@ import difflib +import json import os import re import traceback +from cecli.helpers import responses + class ToolError(Exception): """Custom exception for tool-specific errors that should be reported to the LLM.""" @@ -338,6 +341,48 @@ def format_tool_result( return result_for_llm +def normalize_json_array(value, *, param_name: str = "items", allow_empty: bool = False) -> list: + """ + Coerce tool args that should be arrays but sometimes arrive as JSON strings. + + Local models occasionally double-encode array parameters as JSON text, or emit + arrays as per-character string lists (see ``try_join_char_split_json_array``). + """ + if isinstance(value, list): + coerced = responses.try_join_char_split_json_array(value) + if coerced is not None: + value = coerced + elif len(value) == 1 and isinstance(value[0], str): + # Single element wrapping the whole JSON array/object as a string. + if value[0].strip().startswith(("[", "{", '"')): + value = value[0] + + if isinstance(value, str): + text = value.strip() + if not text: + if allow_empty: + return [] + raise ToolError(f"{param_name} array cannot be empty") + parsed = responses.try_parse_json_value(text) + if parsed is None: + try: + parsed = json.loads(text) + except json.JSONDecodeError as err: + raise ToolError(f"Invalid {param_name} parameter JSON: {err}") from err + value = parsed + + if isinstance(value, dict): + value = [value] + + if not isinstance(value, list): + raise ToolError(f"{param_name} must be an array, got {type(value).__name__}") + + if len(value) == 0 and not allow_empty: + raise ToolError(f"{param_name} array cannot be empty") + + return value + + # Example usage within a hypothetical tool: # try: # abs_path, rel_path, original_content = validate_file_for_edit(coder, file_path) diff --git a/tests/tools/test_extractions.py b/tests/tools/test_extractions.py new file mode 100644 index 00000000000..a3b51439202 --- /dev/null +++ b/tests/tools/test_extractions.py @@ -0,0 +1,406 @@ +"""Tests for all 3 extract_tools_from_content_* methods in cecli.helpers.responses.""" + +import json + +from cecli.helpers.responses import ( + _parse_bracket_arguments, + extract_tools_from_content_json, + extract_tools_from_content_xml, + extract_tools_from_pseudo_json, +) + +# ============================================================================= +# extract_tools_from_content_json +# ============================================================================= + + +def test_json_single_tool_call_with_arguments_key(): + """Standard tool call with 'arguments' key should be extracted.""" + content = '{"name": "ls", "arguments": {"path": "."}}' + result = extract_tools_from_content_json(content) + assert result is not None + assert len(result) == 1 + assert result[0].function.name == "ls" + assert json.loads(result[0].function.arguments) == {"path": "."} + + +def test_json_single_tool_call_with_parameters_key(): + """Tool call with 'parameters' key should be extracted.""" + content = '{"name": "read_file", "parameters": {"file_path": "/tmp/test.txt"}}' + result = extract_tools_from_content_json(content) + assert result is not None + assert len(result) == 1 + assert result[0].function.name == "read_file" + assert json.loads(result[0].function.arguments) == {"file_path": "/tmp/test.txt"} + + +def test_json_single_tool_call_with_params_key(): + """Tool call with 'params' key should be extracted.""" + content = '{"name": "search", "params": {"query": "hello"}}' + result = extract_tools_from_content_json(content) + assert result is not None + assert len(result) == 1 + assert result[0].function.name == "search" + assert json.loads(result[0].function.arguments) == {"query": "hello"} + + +def test_json_array_of_tool_calls(): + """A JSON array of tool call objects should all be extracted.""" + content = ( + "[" + '{"name": "ls", "arguments": {"path": "."}},' + '{"name": "grep", "arguments": {"pattern": "test"}}' + "]" + ) + result = extract_tools_from_content_json(content) + assert result is not None + assert len(result) == 2 + assert result[0].function.name == "ls" + assert result[1].function.name == "grep" + + +def test_json_empty_content_returns_none(): + """Empty content should return None.""" + assert extract_tools_from_content_json("") is None + + +def test_json_no_braces_returns_none(): + """Content without braces or brackets should return None.""" + assert extract_tools_from_content_json("plain text") is None + + +def test_json_missing_required_keys_returns_none(): + """JSON without 'name' and arg keys should not be extracted.""" + content = '{"foo": "bar", "baz": 42}' + assert extract_tools_from_content_json(content) is None + + +def test_json_malformed_json_returns_none(): + """Malformed JSON should return None.""" + content = '{"name": "ls", "arguments": }' + assert extract_tools_from_content_json(content) is None + + +def test_json_with_string_arguments(): + """Tool call where arguments is a string (not dict/list) should work.""" + content = '{"name": "echo", "arguments": "hello world"}' + result = extract_tools_from_content_json(content) + assert result is not None + assert len(result) == 1 + assert result[0].function.name == "echo" + # String arguments get serialised as a JSON string + assert result[0].function.arguments == "hello world" + + +def test_json_tool_with_nested_arguments(): + """Tool call with deeply nested arguments should work.""" + content = ( + '{"name": "ReadRange", "arguments": {' + '"show": [{"file_path": "test.py", "start_text": "hello"}]' + "}}" + ) + result = extract_tools_from_content_json(content) + assert result is not None + assert len(result) == 1 + assert result[0].function.name == "ReadRange" + args = json.loads(result[0].function.arguments) + assert args["show"][0]["file_path"] == "test.py" + + +def test_json_multiple_tool_calls_in_list_first_not_valid(): + """Array where first item is not a valid tool call should skip it.""" + content = ( + "[" '{"irrelevant": true},' '{"name": "actual_tool", "arguments": {"key": "value"}}' "]" + ) + result = extract_tools_from_content_json(content) + assert result is not None + assert len(result) == 1 + assert result[0].function.name == "actual_tool" + + +def test_json_all_invalid_array_items_returns_none(): + """Array where ALL items are invalid tool calls should return None.""" + content = "[" '{"irrelevant": true},' '{"also_invalid": [1, 2, 3]}' "]" + result = extract_tools_from_content_json(content) + assert result is None + + +# ============================================================================= +# extract_tools_from_content_xml +# ============================================================================= + + +def test_xml_single_tool_call(): + """Basic XML-style tool call should be extracted.""" + content = ( + "" + "" + '[{"task": "Update task list", "done": false}]' + "" + "" + ) + result = extract_tools_from_content_xml(content) + assert result is not None + assert len(result) == 1 + assert result[0].function.name == "UpdateTodoList" + args = json.loads(result[0].function.arguments) + assert "tasks" in args + assert args["tasks"][0]["task"] == "Update task list" + + +def test_xml_multiple_parameters(): + """Tool call with multiple parameters should work.""" + content = ( + "" + "" + '"test.py"' + "" + "" + '"hello"' + "" + "" + ) + result = extract_tools_from_content_xml(content) + assert result is not None + assert len(result) == 1 + assert result[0].function.name == "ReadRange" + args = json.loads(result[0].function.arguments) + assert args["file_path"] == "test.py" + assert args["start_text"] == "hello" + + +def test_xml_multiple_tool_calls(): + """Multiple XML tool calls in content should all be extracted.""" + content = ( + "Some text " + "" + "" + '"val1"' + "" + "" + " more text " + "" + "42" + "" + ) + result = extract_tools_from_content_xml(content) + assert result is not None + assert len(result) == 2 + assert result[0].function.name == "ToolA" + assert result[1].function.name == "ToolB" + args_b = json.loads(result[1].function.arguments) + assert args_b["count"] == 42 + + +def test_xml_empty_content_returns_none(): + """Empty content should return None.""" + assert extract_tools_from_content_xml("") is None + + +def test_xml_no_function_tags_returns_none(): + """Content without tags should return None.""" + assert extract_tools_from_content_xml("plain text") is None + + +def test_xml_parameter_with_array_value(): + """Parameter with a JSON array value should parse correctly.""" + content = "" "" "[1, 2, 3, 4]" "" "" + result = extract_tools_from_content_xml(content) + assert result is not None + assert len(result) == 1 + args = json.loads(result[0].function.arguments) + assert args["items"] == [1, 2, 3, 4] + + +def test_xml_parameter_with_string_fallback(): + """Non-JSON parameter value should fall back to raw string.""" + content = ( + "" + "" + "just some plain text" + "" + "" + ) + result = extract_tools_from_content_xml(content) + assert result is not None + assert len(result) == 1 + args = json.loads(result[0].function.arguments) + assert args["note"] == "just some plain text" + + +def test_xml_nested_in_text(): + """XML tool call embedded in surrounding text should be extracted.""" + content = ( + "I will use the UpdateTodoList tool.\n" + "" + "" + '[{"task": "test", "done": false}]' + "" + "" + "\nThat should update the list." + ) + result = extract_tools_from_content_xml(content) + assert result is not None + assert len(result) == 1 + assert result[0].function.name == "UpdateTodoList" + + +# ============================================================================= +# extract_tools_from_pseudo_json +# ============================================================================= + + +def test_pseudo_single_tool_with_array_arg(): + """Bracket format with a JSON array argument should be extracted.""" + content = '[Local--ReadRange(show=[{"file_path": "test.py", ' '"start_text": "def foo"}])]' + result = extract_tools_from_pseudo_json(content) + assert result is not None + assert len(result) == 1 + assert result[0].function.name == "Local--ReadRange" + args = json.loads(result[0].function.arguments) + assert args["show"][0]["file_path"] == "test.py" + + +def test_pseudo_multiple_args_with_different_types(): + """Multiple args with boolean, string, and array values.""" + content = ( + '[Local--ReadRange(show=[{"file_path": "test.py", ' + '"start_text": "class A"}], verbose=true, mode="strict")]' + ) + result = extract_tools_from_pseudo_json(content) + assert result is not None + assert len(result) == 1 + assert result[0].function.name == "Local--ReadRange" + args = json.loads(result[0].function.arguments) + assert args["verbose"] is True + assert args["mode"] == "strict" + assert len(args["show"]) == 1 + + +def test_pseudo_multiple_tool_blocks(): + """Multiple bracket tool blocks in content should all be extracted.""" + content = 'First [ToolA(arg1="val1")] and ' "then [ToolB(count=42, flag=true)]" + result = extract_tools_from_pseudo_json(content) + assert result is not None + assert len(result) == 2 + assert result[0].function.name == "ToolA" + assert result[1].function.name == "ToolB" + args_b = json.loads(result[1].function.arguments) + assert args_b["count"] == 42 + assert args_b["flag"] is True + + +def test_pseudo_empty_content_returns_none(): + """Empty content should return None.""" + assert extract_tools_from_pseudo_json("") is None + + +def test_pseudo_no_brackets_returns_none(): + """Content without brackets should return None.""" + assert extract_tools_from_pseudo_json("plain text") is None + + +def test_pseudo_nested_parentheses(): + """Values with nested parentheses should be handled correctly.""" + content = '[DeepNest(calc="((1+2)*3)", name="test")]' + result = extract_tools_from_pseudo_json(content) + assert result is not None + assert len(result) == 1 + assert result[0].function.name == "DeepNest" + args = json.loads(result[0].function.arguments) + assert args["calc"] == "((1+2)*3)" + assert args["name"] == "test" + + +def test_pseudo_incomplete_bracket_no_match(): + """Missing closing bracket should not be extracted.""" + content = '[ToolA(arg1="val1"' + assert extract_tools_from_pseudo_json(content) is None + + +def test_pseudo_missing_closing_paren(): + """Missing closing parenthesis should skip the block.""" + content = '[ToolA(arg1="val1") more text' + assert extract_tools_from_pseudo_json(content) is None + + +def test_pseudo_tool_in_surrounding_text(): + """Bracket tool call embedded in text should be extracted.""" + content = ( + "I will use the Local--ReadRange tool:\n" + '[Local--ReadRange(show=[{"file_path": "test.py"}])]' + "\nThat should read the file." + ) + result = extract_tools_from_pseudo_json(content) + assert result is not None + assert len(result) == 1 + assert result[0].function.name == "Local--ReadRange" + + +def test_pseudo_numeric_and_null_values(): + """Numeric and null values should parse correctly.""" + content = "[MathTool(x=42, y=3.14, optional=null)]" + result = extract_tools_from_pseudo_json(content) + assert result is not None + assert len(result) == 1 + args = json.loads(result[0].function.arguments) + assert args["x"] == 42 + assert args["y"] == 3.14 + assert args["optional"] is None + + +# ============================================================================= +# _parse_bracket_arguments (helper) +# ============================================================================= + + +def test_parse_bracket_arguments_single(): + """Single argument should parse correctly.""" + payload = 'show=[{"file_path": "test.py"}]' + result = _parse_bracket_arguments(payload) + assert result["show"] == [{"file_path": "test.py"}] + + +def test_parse_bracket_arguments_multiple(): + """Multiple arguments of mixed types should parse correctly.""" + payload = 'show=[{"file_path": "test.py"}], verbose=true, mode="strict"' + result = _parse_bracket_arguments(payload) + assert len(result) == 3 + assert result["verbose"] is True + assert result["mode"] == "strict" + assert result["show"] == [{"file_path": "test.py"}] + + +def test_parse_bracket_arguments_empty(): + """Empty payload should return empty dict.""" + assert _parse_bracket_arguments("") == {} + + +def test_parse_bracket_arguments_numbers_and_null(): + """Numbers and null values should be parsed correctly.""" + payload = "x=42, y=3.14, z=null" + result = _parse_bracket_arguments(payload) + assert result["x"] == 42 + assert result["y"] == 3.14 + assert result["z"] is None + + +def test_parse_bracket_arguments_escaped_quotes(): + """Values with escaped double quotes inside strings should parse correctly.""" + payload = 'arg="value with \\"escaped quote\\" inside"' + result = _parse_bracket_arguments(payload) + assert result["arg"] == 'value with "escaped quote" inside' + + +def test_parse_bracket_arguments_escaped_backslash(): + """Values with escaped backslashes inside strings should parse correctly.""" + payload = 'path="C:\\\\Users\\\\test"' + result = _parse_bracket_arguments(payload) + assert result["path"] == "C:\\Users\\test" + + +def test_parse_bracket_arguments_single_quoted_string(): + """Single-quoted string values retain their quotes (JSON doesn't parse single quotes).""" + payload = "name='hello world'" + result = _parse_bracket_arguments(payload) + assert result["name"] == "'hello world'" diff --git a/tests/tools/test_tool_arguments.py b/tests/tools/test_tool_arguments.py new file mode 100644 index 00000000000..c06ff1d5d83 --- /dev/null +++ b/tests/tools/test_tool_arguments.py @@ -0,0 +1,298 @@ +"""Glued local-model tool JSON argument parsing.""" + +import json +from types import SimpleNamespace +from unittest.mock import Mock + +import pytest + +from cecli.coders.base_coder import Coder +from cecli.helpers.responses import ( + _repair_local_model_json_text, + extract_tools_from_content_json, + merge_glued_json_objects, + parse_tool_arguments, + try_join_char_split_json_array, + try_parse_json_value, +) +from cecli.tools.grep import Tool as GrepTool +from cecli.tools.utils.helpers import ToolError, normalize_json_array + + +def test_parse_tool_arguments_merges_glued_objects_with_empty_fragments(): + raw = '{"limit": 15}{}{"path": "."}' + assert parse_tool_arguments(raw) == {"limit": 15, "path": "."} + + +def test_parse_tool_arguments_merges_grep_style_glued_args(): + raw = ( + '{"limit": 15}{}{"searches": [{"file_pattern": "*.md", ' + '"pattern": "TODO|FIXME", "use_regex": true}]}' + ) + out = parse_tool_arguments(raw) + assert out["limit"] == 15 + assert out["searches"][0]["pattern"] == "TODO|FIXME" + + +def test_merge_glued_returns_none_for_non_object_chunks(): + assert merge_glued_json_objects(['["a"]', '{"b": 1}']) is None + + +def test_merge_glued_all_empty_chunks_returns_dict(): + """All-empty chunks should return an empty dict (no non-empty content to merge).""" + result = merge_glued_json_objects(["{}", "{}"]) + assert result is not None + assert result == {} + + +def test_merge_glued_single_empty_chunk_returns_dict(): + """A single empty object chunk should return an empty dict.""" + result = merge_glued_json_objects(["{}"]) + assert result is not None + assert result == {} + + +def test_merge_glued_empty_string_chunks_returns_empty_dict(): + """Chunks that are empty strings are skipped, returning an empty merged dict.""" + result = merge_glued_json_objects(["", "", ""]) + # Empty strings are stripped to empty and skipped, leaving merged == {} -> returns {} + assert result == {} + + +def test_expand_concatenated_json_merges_instead_of_splitting(monkeypatch): + """Dogfood: DeepSeek ``{…}{}{…}`` must not become three tool calls.""" + + class MiniCoder(Coder): + def __init__(self): + pass + + coder = MiniCoder.__new__(MiniCoder) + tool_call = SimpleNamespace( + id="call-1", + function=SimpleNamespace( + name="ls", + arguments='{"limit": 15}{}{"path": "."}', + ), + ) + expanded = coder._expand_concatenated_json([tool_call]) + assert len(expanded) == 1 + assert json.loads(expanded[0].function.arguments) == {"limit": 15, "path": "."} + assert expanded[0].id == "call-1" + + +def test_grep_format_output_empty_searches_does_not_crash_tool_footer(): + coder = SimpleNamespace( + io=SimpleNamespace(tool_error=Mock(), tool_output=Mock(), tool_warning=Mock()), + verbose=False, + pretty=False, + tui=lambda: None, + ) + tool_response = SimpleNamespace( + function=SimpleNamespace( + name="Grep", + arguments='{"limit": 15}{}{"searches": []}', + ), + ) + GrepTool.format_output( + coder, + mcp_server=SimpleNamespace(name="Local"), + tool_response=tool_response, + ) + assert coder.io.tool_error.called + + +def test_try_join_char_split_json_array_reconstructs_array(): + """Char-split JSON array should be joined back into a proper list.""" + items = ["[", "{", '"', "t", "a", "s", "k", '"', ":", " ", '"', "x", '"', "}", "]"] + result = try_join_char_split_json_array(items) + assert result == [{"task": "x"}] + + +def test_try_join_char_split_json_array_reconstructs_dict(): + """Char-split JSON object should be joined and wrapped in a list.""" + items = ["{", '"', "a", '"', ":", " ", "1", "}"] + result = try_join_char_split_json_array(items) + assert result == [{"a": 1}] + + +def test_try_join_char_split_json_array_too_few_items(): + """Less than 8 items should return None.""" + assert try_join_char_split_json_array(["{", "}"]) is None + + +def test_try_join_char_split_json_array_non_string_items(): + """Non-string items should return None.""" + assert try_join_char_split_json_array(["[", 1, "{", "}"]) is None + + +def test_try_join_char_split_json_array_does_not_start_with_brace(): + """Joined string not starting with [ or { should return None.""" + items = list('"hello"') # A JSON string, not object/array + assert try_join_char_split_json_array(items) is None + + +def test_try_join_char_split_json_array_invalid_json(): + """Joined string that is invalid JSON should return None.""" + items = ["{", '"', "a", '"', ":", " ", "b", "r", "o", "k", "e", "n", "}"] + assert try_join_char_split_json_array(items) is None + + +def test_repair_local_model_json_text_newline_before_quote(): + """Repair colon-newline-quote breakage inside JSON.""" + broken = '"end_text":\n",' + fixed = _repair_local_model_json_text(broken) + assert '"end_text": "",' in fixed + + +def test_repair_local_model_json_text_newline_before_closing_brace(): + """Repair colon-newline-quote before closing brace.""" + broken = '"end_text":\n"}' + fixed = _repair_local_model_json_text(broken) + assert '"end_text": ""}' in fixed + + +def test_repair_local_model_json_text_valid_json_passes_through(): + """Valid JSON text should remain unchanged.""" + valid = '{"limit": 15, "path": "."}' + assert _repair_local_model_json_text(valid) == valid + + +def test_normalize_json_array_actual_list_passthrough(): + """A proper list should pass through unchanged.""" + data = [{"pattern": "foo"}, {"pattern": "bar"}] + result = normalize_json_array(data, param_name="searches") + assert result == data + + +def test_normalize_json_array_char_split_input(): + """Char-split list should be joined back into a proper array.""" + items = ["[", "{", '"', "t", "a", "s", "k", '"', ":", " ", '"', "x", '"', "}", "]"] + result = normalize_json_array(items, param_name="delegations") + assert result == [{"task": "x"}] + + +def test_normalize_json_array_json_string_wrapping_array(): + """A JSON string containing an array should be parsed.""" + result = normalize_json_array('[{"a": 1}, {"b": 2}]', param_name="edits") + assert result == [{"a": 1}, {"b": 2}] + + +def test_normalize_json_array_json_string_wrapping_dict(): + """A JSON string containing a dict should be wrapped in a list.""" + result = normalize_json_array('{"task": "hello"}', param_name="tasks") + assert result == [{"task": "hello"}] + + +def test_normalize_json_array_empty_string_raises_tool_error(): + """An empty string should raise ToolError.""" + with pytest.raises(ToolError, match="array cannot be empty"): + normalize_json_array("", param_name="items") + + +def test_normalize_json_array_empty_string_with_allow_empty(): + """An empty string with allow_empty=True should return empty list.""" + assert normalize_json_array("", param_name="items", allow_empty=True) == [] + + +def test_normalize_json_array_invalid_json_string_raises_tool_error(): + """An invalid JSON string should raise ToolError.""" + with pytest.raises(ToolError, match="Invalid.*parameter JSON"): + normalize_json_array("{broken", param_name="items") + + +def test_normalize_json_array_dict_input_wraps_in_list(): + """A bare dict should be wrapped in a list.""" + result = normalize_json_array({"task": "hello"}, param_name="tasks") + assert result == [{"task": "hello"}] + + +def test_normalize_json_array_non_list_non_dict_raises(): + """A non-list, non-dict, non-string value should raise ToolError.""" + with pytest.raises(ToolError, match="must be an array"): + normalize_json_array(42, param_name="items") + + +def test_normalize_json_array_empty_list_without_allow_empty_raises(): + """An empty list without allow_empty should raise ToolError.""" + with pytest.raises(ToolError, match="array cannot be empty"): + normalize_json_array([], param_name="items") + + +def test_normalize_json_array_empty_list_with_allow_empty(): + """An empty list with allow_empty=True should pass through.""" + assert normalize_json_array([], param_name="items", allow_empty=True) == [] + + +def test_extract_tools_from_content_json_with_arguments_key(): + """Standard tool calls with 'arguments' key should be extracted.""" + content = '{"name": "ls", "arguments": {"path": "."}}' + result = extract_tools_from_content_json(content) + assert result is not None + assert len(result) == 1 + assert result[0].function.name == "ls" + assert json.loads(result[0].function.arguments) == {"path": "."} + + +def test_extract_tools_from_content_json_with_parameters_key(): + """Tool calls with 'parameters' key should be extracted.""" + content = '{"name": "ls", "parameters": {"path": "."}}' + result = extract_tools_from_content_json(content) + assert result is not None + assert len(result) == 1 + assert result[0].function.name == "ls" + assert json.loads(result[0].function.arguments) == {"path": "."} + + +def test_extract_tools_from_content_json_with_params_key(): + """Tool calls with 'params' key should be extracted.""" + content = '{"name": "ls", "params": {"path": "."}}' + result = extract_tools_from_content_json(content) + assert result is not None + assert len(result) == 1 + assert result[0].function.name == "ls" + assert json.loads(result[0].function.arguments) == {"path": "."} + + +def test_extract_tools_from_content_json_array_with_parameters(): + """Array of tool calls with 'parameters' key should be extracted.""" + content = ( + '[{"name": "ls", "parameters": {"path": "."}},' + ' {"name": "grep", "parameters": {"pattern": "foo"}}]' + ) + result = extract_tools_from_content_json(content) + assert result is not None + assert len(result) == 2 + assert result[0].function.name == "ls" + assert result[1].function.name == "grep" + assert json.loads(result[1].function.arguments) == {"pattern": "foo"} + + +def test_parse_tool_arguments_truly_unparseable(): + """Truly unparsable JSON should return an empty dict.""" + inp = "this is not json at all!!!" + result = parse_tool_arguments(inp) + assert "@error" in result + assert "Malformed JSON arguments" in result["@error"] + + +def test_parse_tool_arguments_empty_string(): + """Empty string should return an empty dict.""" + assert parse_tool_arguments("") == {} + assert parse_tool_arguments(" ") == {} + assert parse_tool_arguments(None) == {} + + +def test_try_parse_json_value_empty_text(): + """Empty text should return None.""" + assert try_parse_json_value("") is None + assert try_parse_json_value(" ") is None + + +def test_parse_tool_arguments_uneven_glued_objects_with_list(): + """Glued objects where one chunk is a list should not merge (fallback).""" + # This is a case that currently returns {} because the merge fails + inp = '{"a": 1}{"b": 2}["c"]' + result = parse_tool_arguments(inp) + # The function tries to parse, failing on the mixed glued content + assert "@error" in result + assert "Could not merge glued JSON objects" in result["@error"] From cb8b714d52537759ed7391d6b61447cbb4aeecad Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 31 May 2026 15:54:35 -0400 Subject: [PATCH 25/37] Skills should be loadable by subagents --- cecli/commands/exclude_skill.py | 2 +- cecli/commands/include_skill.py | 2 +- cecli/commands/load_skill.py | 3 ++- cecli/commands/remove_skill.py | 2 +- cecli/helpers/skills.py | 20 ++++++++++++++++---- cecli/tools/load_skill.py | 2 +- cecli/tools/remove_skill.py | 2 +- 7 files changed, 23 insertions(+), 10 deletions(-) diff --git a/cecli/commands/exclude_skill.py b/cecli/commands/exclude_skill.py index 086e967678a..b50ceb00044 100644 --- a/cecli/commands/exclude_skill.py +++ b/cecli/commands/exclude_skill.py @@ -19,7 +19,7 @@ async def execute(cls, io, coder, args, **kwargs): skill_names = args.strip().split() # Check if we're in agent mode - if not hasattr(coder, "edit_format") or coder.edit_format != "agent": + if not hasattr(coder, "edit_format") or coder.edit_format not in ("agent", "subagent"): io.tool_output("Skill exclusion is only available in agent mode.") return format_command_result( io, "exclude-skill", "Skill exclusion is only available in agent mode" diff --git a/cecli/commands/include_skill.py b/cecli/commands/include_skill.py index 754ccdf2dd2..6fa39ac3732 100644 --- a/cecli/commands/include_skill.py +++ b/cecli/commands/include_skill.py @@ -19,7 +19,7 @@ async def execute(cls, io, coder, args, **kwargs): skill_names = args.strip().split() # Check if we're in agent mode - if not hasattr(coder, "edit_format") or coder.edit_format != "agent": + if not hasattr(coder, "edit_format") or coder.edit_format not in ("agent", "subagent"): io.tool_output("Skill inclusion is only available in agent mode.") return format_command_result( io, "include-skill", "Skill inclusion is only available in agent mode" diff --git a/cecli/commands/load_skill.py b/cecli/commands/load_skill.py index 33056214f4a..328f56653f9 100644 --- a/cecli/commands/load_skill.py +++ b/cecli/commands/load_skill.py @@ -19,7 +19,8 @@ async def execute(cls, io, coder, args, **kwargs): skill_names = args.strip().split() # Check if we're in agent mode - if not hasattr(coder, "edit_format") or coder.edit_format != "agent": + io.tool_output(coder.edit_format) + if not hasattr(coder, "edit_format") or coder.edit_format not in ("agent", "subagent"): io.tool_output("Skill loading is only available in agent mode.") return format_command_result( io, "load-skill", "Skill loading is only available in agent mode" diff --git a/cecli/commands/remove_skill.py b/cecli/commands/remove_skill.py index 35afe8f5e42..81c684f7dd3 100644 --- a/cecli/commands/remove_skill.py +++ b/cecli/commands/remove_skill.py @@ -19,7 +19,7 @@ async def execute(cls, io, coder, args, **kwargs): skill_names = args.strip().split() # Check if we're in agent mode - if not hasattr(coder, "edit_format") or coder.edit_format != "agent": + if not hasattr(coder, "edit_format") or coder.edit_format not in ("agent", "subagent"): io.tool_output("Skill removal is only available in agent mode.") return format_command_result( io, "remove-skill", "Skill removal is only available in agent mode" diff --git a/cecli/helpers/skills.py b/cecli/helpers/skills.py index bfb0f469a0c..40bf2a3bd45 100644 --- a/cecli/helpers/skills.py +++ b/cecli/helpers/skills.py @@ -449,7 +449,10 @@ def load_skill(self, skill_name: str) -> str: return "Error: Skills manager not connected to a coder instance." # Check if we're in agent mode - if not hasattr(self.coder, "edit_format") or self.coder.edit_format != "agent": + if not hasattr(self.coder, "edit_format") or self.coder.edit_format not in ( + "agent", + "subagent", + ): return "Error: Skill loading is only available in agent mode." # Check if skill is already loaded @@ -498,7 +501,10 @@ def remove_skill(self, skill_name: str) -> str: return "Error: Skills manager not connected to a coder instance." # Check if we're in agent mode - if not hasattr(self.coder, "edit_format") or self.coder.edit_format != "agent": + if not hasattr(self.coder, "edit_format") or self.coder.edit_format not in ( + "agent", + "subagent", + ): return "Error: Skill removal is only available in agent mode." # Check if skill is already removed @@ -532,7 +538,10 @@ def include_skill(self, skill_name: str) -> str: return "Error: Skills manager not connected to a coder instance." # Check if we're in agent mode - if not hasattr(self.coder, "edit_format") or self.coder.edit_format != "agent": + if not hasattr(self.coder, "edit_format") or self.coder.edit_format not in ( + "agent", + "subagent", + ): return "Error: Skill inclusion is only available in agent mode." # Find the skill to verify it exists @@ -590,7 +599,10 @@ def exclude_skill(self, skill_name: str) -> str: return "Error: Skills manager not connected to a coder instance." # Check if we're in agent mode - if not hasattr(self.coder, "edit_format") or self.coder.edit_format != "agent": + if not hasattr(self.coder, "edit_format") or self.coder.edit_format not in ( + "agent", + "subagent", + ): return "Error: Skill exclusion is only available in agent mode." # Find the skill to verify it exists diff --git a/cecli/tools/load_skill.py b/cecli/tools/load_skill.py index 15f620579fb..f59beea940f 100644 --- a/cecli/tools/load_skill.py +++ b/cecli/tools/load_skill.py @@ -30,7 +30,7 @@ def execute(cls, coder, skill_name, **kwargs): return "Error: Skill name is required." # Check if we're in agent mode - if not hasattr(coder, "edit_format") or coder.edit_format != "agent": + if not hasattr(coder, "edit_format") or coder.edit_format not in ("agent", "subagent"): return "Error: Skill loading is only available in agent mode." # Check if skills_manager is available diff --git a/cecli/tools/remove_skill.py b/cecli/tools/remove_skill.py index 70afb02ebfd..a4e0e72eed9 100644 --- a/cecli/tools/remove_skill.py +++ b/cecli/tools/remove_skill.py @@ -30,7 +30,7 @@ def execute(cls, coder, skill_name, **kwargs): return "Error: Skill name is required." # Check if we're in agent mode - if not hasattr(coder, "edit_format") or coder.edit_format != "agent": + if not hasattr(coder, "edit_format") or coder.edit_format not in ("agent", "subagent"): return "Error: Skill removal is only available in agent mode." # Check if skills_manager is available From 7f8296edd2e4aa46eab14e3b70e6a690a2d7552e Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 31 May 2026 15:58:50 -0400 Subject: [PATCH 26/37] Switch to newly spawned agent on creation --- cecli/commands/spawn_agent.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/cecli/commands/spawn_agent.py b/cecli/commands/spawn_agent.py index de77578713f..6b1c185d17d 100644 --- a/cecli/commands/spawn_agent.py +++ b/cecli/commands/spawn_agent.py @@ -27,11 +27,21 @@ async def execute(cls, io, coder, args, **kwargs): try: agent_service = AgentService.get_instance(coder) - await agent_service.spawn(name, prompt, parent=coder, auto_reap=False) + new_coder, info = await agent_service.spawn(name, prompt, parent=coder, auto_reap=False) + + # Set the newly spawned agent as the foreground agent + agent_service.foreground_uuid = info.coder.uuid if coder.tui and coder.tui(): - switch_key = coder.tui().get_keys_for("next_agent") - io.tool_output(f"Sub-agent '{name}' spawned. " f"Switch to it with {switch_key}") + tui = coder.tui() + switch_key = tui.get_keys_for("next_agent") + io.tool_output(f"Sub-agent '{name}' spawned and active. Switch with {switch_key}") + + # Switch TUI display to the new sub-agent's container + try: + tui.call_from_thread(tui._switch_to_container, info.coder.uuid) + except Exception: + pass except ValueError as e: io.tool_error(f"Error: {e}") except RuntimeError as e: From d84adf66d0c6d583ed15bef9b5be8acd4a237306 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 31 May 2026 17:51:34 -0400 Subject: [PATCH 27/37] Update model-metadata disable default litellm metadata fetch on start up, bump version --- cecli/__init__.py | 2 +- cecli/main.py | 2 +- cecli/resources/model-metadata.json | 1801 ++++++++++++++++++++++----- 3 files changed, 1521 insertions(+), 284 deletions(-) diff --git a/cecli/__init__.py b/cecli/__init__.py index 32ad82fb67b..6a368cff4c6 100644 --- a/cecli/__init__.py +++ b/cecli/__init__.py @@ -1,6 +1,6 @@ from packaging import version -__version__ = "0.99.12.dev" +__version__ = "0.100.2.dev" safe_version = __version__ try: diff --git a/cecli/main.py b/cecli/main.py index 69892725e38..17b96a8f8af 100644 --- a/cecli/main.py +++ b/cecli/main.py @@ -584,7 +584,7 @@ async def main_async(argv=None, input=None, output=None, force_git_root=None, re if not args.verify_ssl: import httpx - os.environ["SSL_VERIFY"] = "" + os.environ["LITELLM_LOCAL_MODEL_COST"] = "true" litellm._load_litellm() litellm._lazy_module.client_session = httpx.Client(verify=False) litellm._lazy_module.aclient_session = httpx.AsyncClient(verify=False) diff --git a/cecli/resources/model-metadata.json b/cecli/resources/model-metadata.json index 2413e5407f4..8aee168a3e5 100644 --- a/cecli/resources/model-metadata.json +++ b/cecli/resources/model-metadata.json @@ -374,7 +374,6 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true }, "anthropic.claude-haiku-4-5@20251001": { @@ -398,7 +397,6 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_streaming": true, "supports_native_structured_output": true }, @@ -424,8 +422,8 @@ "supports_vision": true, "supports_prompt_caching": false, "supports_reasoning": true, - "supports_minimal_reasoning_effort": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_output_config": true }, "anthropic.claude-opus-4-1-20250805-v1:0": { "cache_creation_input_token_cost": 0.00001875, @@ -450,8 +448,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "anthropic.claude-opus-4-20250514-v1:0": { "cache_creation_input_token_cost": 0.00001875, @@ -476,8 +473,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "anthropic.claude-opus-4-5-20251101-v1:0": { "cache_creation_input_token_cost": 0.00000625, @@ -501,12 +497,12 @@ "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, - "supports_minimal_reasoning_effort": true, "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159, - "supports_native_structured_output": true + "supports_native_structured_output": true, + "supports_output_config": true, + "bedrock_output_config_effort_ceiling": "high" }, "anthropic.claude-opus-4-6-v1": { "cache_creation_input_token_cost": 0.00000625, @@ -533,10 +529,10 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, + "supports_output_config": true, "supports_max_reasoning_effort": true, - "supports_minimal_reasoning_effort": true + "bedrock_output_config_effort_ceiling": "max" }, "anthropic.claude-opus-4-7": { "cache_creation_input_token_cost": 0.00000625, @@ -564,10 +560,41 @@ "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, "supports_max_reasoning_effort": true, - "supports_minimal_reasoning_effort": true + "supports_output_config": true, + "bedrock_output_config_effort_ceiling": "xhigh" + }, + "anthropic.claude-opus-4-8": { + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_1hr": 0.00001, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_native_structured_output": true, + "supports_max_reasoning_effort": true, + "supports_output_config": true, + "bedrock_output_config_effort_ceiling": "xhigh" }, "anthropic.claude-sonnet-4-20250514-v1:0": { "cache_creation_input_token_cost": 0.00000375, @@ -596,8 +623,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "anthropic.claude-sonnet-4-5-20250929-v1:0": { "cache_creation_input_token_cost": 0.00000375, @@ -629,7 +655,6 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159, "supports_native_structured_output": true }, "anthropic.claude-sonnet-4-6": { @@ -658,9 +683,8 @@ "supports_max_reasoning_effort": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, - "supports_minimal_reasoning_effort": true + "supports_output_config": true }, "anthropic.claude-v1": { "input_cost_per_token": 0.000008, @@ -962,7 +986,6 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true }, "apac.anthropic.claude-sonnet-4-20250514-v1:0": { @@ -992,8 +1015,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "au.anthropic.claude-haiku-4-5-20251001-v1:0": { "cache_creation_input_token_cost": 0.000001375, @@ -1014,7 +1036,6 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true }, "au.anthropic.claude-opus-4-6-v1": { @@ -1041,10 +1062,10 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, + "supports_output_config": true, "supports_max_reasoning_effort": true, - "supports_minimal_reasoning_effort": true + "bedrock_output_config_effort_ceiling": "max" }, "au.anthropic.claude-opus-4-7": { "cache_creation_input_token_cost": 0.000006875, @@ -1071,10 +1092,41 @@ "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, "supports_max_reasoning_effort": true, - "supports_minimal_reasoning_effort": true + "supports_output_config": true, + "bedrock_output_config_effort_ceiling": "xhigh" + }, + "au.anthropic.claude-opus-4-8": { + "cache_creation_input_token_cost": 0.000006875, + "cache_creation_input_token_cost_above_1hr": 0.000011, + "cache_read_input_token_cost": 5.5e-7, + "input_cost_per_token": 0.0000055, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.0000275, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_native_structured_output": true, + "supports_max_reasoning_effort": true, + "supports_output_config": true, + "bedrock_output_config_effort_ceiling": "xhigh" }, "au.anthropic.claude-sonnet-4-5-20250929-v1:0": { "cache_creation_input_token_cost": 0.000004125, @@ -1104,7 +1156,6 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true }, "au.anthropic.claude-sonnet-4-6": { @@ -1132,9 +1183,8 @@ "supports_max_reasoning_effort": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, - "supports_minimal_reasoning_effort": true + "supports_output_config": true }, "azure/command-r-plus": { "input_cost_per_token": 0.000003, @@ -4540,10 +4590,10 @@ "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, - "supports_minimal_reasoning_effort": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "supports_output_config": true }, "azure_ai/claude-opus-4-6": { "input_cost_per_token": 0.000005, @@ -4570,9 +4620,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159, - "supports_max_reasoning_effort": true, - "supports_minimal_reasoning_effort": true + "supports_output_config": true, + "supports_max_reasoning_effort": true }, "azure_ai/claude-opus-4-7": { "input_cost_per_token": 0.000005, @@ -4600,9 +4649,35 @@ "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, - "tool_use_system_prompt_tokens": 159, - "supports_max_reasoning_effort": true, - "supports_minimal_reasoning_effort": true + "supports_max_reasoning_effort": true + }, + "azure_ai/claude-opus-4-8": { + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000025, + "litellm_provider": "azure_ai", + "max_input_tokens": 200000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_1hr": 0.00001, + "cache_read_input_token_cost": 5e-7, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_max_reasoning_effort": true }, "azure_ai/claude-sonnet-4-5": { "cache_creation_input_token_cost": 0.00000375, @@ -4646,8 +4721,7 @@ "supports_max_reasoning_effort": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, - "supports_minimal_reasoning_effort": true + "supports_output_config": true }, "azure_ai/deepseek-r1": { "input_cost_per_token": 0.00000135, @@ -4743,57 +4817,339 @@ "supports_tool_choice": true, "supports_web_search": true }, - "azure_ai/gpt-oss-120b": { - "input_cost_per_token": 1.5e-7, - "output_cost_per_token": 6e-7, + "azure_ai/gpt-5.4": { + "cache_read_input_token_cost": 2.5e-7, + "cache_read_input_token_cost_above_272k_tokens": 5e-7, + "cache_read_input_token_cost_priority": 5e-7, + "cache_read_input_token_cost_above_272k_tokens_priority": 0.000001, + "input_cost_per_token": 0.0000025, + "input_cost_per_token_above_272k_tokens": 0.000005, + "input_cost_per_token_priority": 0.000005, + "input_cost_per_token_above_272k_tokens_priority": 0.00001, "litellm_provider": "azure_ai", - "max_input_tokens": 131072, - "max_output_tokens": 131072, - "max_tokens": 131072, + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, "mode": "chat", - "source": "https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/", + "output_cost_per_token": 0.000015, + "output_cost_per_token_above_272k_tokens": 0.0000225, + "output_cost_per_token_priority": 0.00003, + "output_cost_per_token_above_272k_tokens_priority": 0.000045, + "source": "https://ai.azure.com/catalog/models/gpt-5.4", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, + "supports_native_streaming": true, "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, "supports_response_schema": true, - "supports_tool_choice": true + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": true, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": true }, - "azure_ai/grok-3": { - "input_cost_per_token": 0.000003, + "azure_ai/gpt-5.4-2026-03-05": { + "cache_read_input_token_cost": 2.5e-7, + "cache_read_input_token_cost_above_272k_tokens": 5e-7, + "cache_read_input_token_cost_priority": 5e-7, + "cache_read_input_token_cost_above_272k_tokens_priority": 0.000001, + "input_cost_per_token": 0.0000025, + "input_cost_per_token_above_272k_tokens": 0.000005, + "input_cost_per_token_priority": 0.000005, + "input_cost_per_token_above_272k_tokens_priority": 0.00001, "litellm_provider": "azure_ai", - "max_input_tokens": 131072, - "max_output_tokens": 131072, - "max_tokens": 131072, + "max_input_tokens": 1050000, + "max_output_tokens": 128000, + "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.000015, - "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/", + "output_cost_per_token_above_272k_tokens": 0.0000225, + "output_cost_per_token_priority": 0.00003, + "output_cost_per_token_above_272k_tokens_priority": 0.000045, + "source": "https://ai.azure.com/catalog/models/gpt-5.4", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, - "supports_response_schema": false, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, "supports_tool_choice": true, - "supports_web_search": true + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": true, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": true }, - "azure_ai/grok-3-mini": { - "input_cost_per_token": 2.5e-7, + "azure_ai/gpt-5.4-mini": { + "cache_read_input_token_cost": 7.5e-8, + "cache_read_input_token_cost_above_272k_tokens": 1.5e-7, + "cache_read_input_token_cost_priority": 1.5e-7, + "cache_read_input_token_cost_above_272k_tokens_priority": 3e-7, + "input_cost_per_token": 7.5e-7, + "input_cost_per_token_above_272k_tokens": 0.0000015, + "input_cost_per_token_priority": 0.0000015, + "input_cost_per_token_above_272k_tokens_priority": 0.000003, "litellm_provider": "azure_ai", - "max_input_tokens": 131072, - "max_output_tokens": 131072, - "max_tokens": 131072, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 0.00000127, - "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/", + "output_cost_per_token": 0.0000045, + "output_cost_per_token_above_272k_tokens": 0.00000675, + "output_cost_per_token_priority": 0.000009, + "output_cost_per_token_above_272k_tokens_priority": 0.0000135, + "source": "https://ai.azure.com/catalog/models/gpt-5.4-mini", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, "supports_reasoning": true, - "supports_response_schema": false, + "supports_response_schema": true, + "supports_system_messages": true, "supports_tool_choice": true, - "supports_web_search": true + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": true, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": false }, - "azure_ai/grok-4": { - "input_cost_per_token": 0.000003, + "azure_ai/gpt-5.4-mini-2026-03-17": { + "cache_read_input_token_cost": 7.5e-8, + "cache_read_input_token_cost_above_272k_tokens": 1.5e-7, + "cache_read_input_token_cost_priority": 1.5e-7, + "cache_read_input_token_cost_above_272k_tokens_priority": 3e-7, + "input_cost_per_token": 7.5e-7, + "input_cost_per_token_above_272k_tokens": 0.0000015, + "input_cost_per_token_priority": 0.0000015, + "input_cost_per_token_above_272k_tokens_priority": 0.000003, "litellm_provider": "azure_ai", - "max_input_tokens": 131072, - "max_output_tokens": 131072, - "max_tokens": 131072, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000, "mode": "chat", - "output_cost_per_token": 0.000015, + "output_cost_per_token": 0.0000045, + "output_cost_per_token_above_272k_tokens": 0.00000675, + "output_cost_per_token_priority": 0.000009, + "output_cost_per_token_above_272k_tokens_priority": 0.0000135, + "source": "https://ai.azure.com/catalog/models/gpt-5.4-mini", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": true, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": false + }, + "azure_ai/gpt-5.4-nano": { + "cache_read_input_token_cost": 2e-8, + "cache_read_input_token_cost_above_272k_tokens": 4e-8, + "cache_read_input_token_cost_priority": 4e-8, + "cache_read_input_token_cost_above_272k_tokens_priority": 8e-8, + "input_cost_per_token": 2e-7, + "input_cost_per_token_above_272k_tokens": 4e-7, + "input_cost_per_token_priority": 4e-7, + "input_cost_per_token_above_272k_tokens_priority": 8e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.00000125, + "output_cost_per_token_above_272k_tokens": 0.000001875, + "output_cost_per_token_priority": 0.0000025, + "output_cost_per_token_above_272k_tokens_priority": 0.00000375, + "source": "https://ai.azure.com/catalog/models/gpt-5.4-nano", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": true, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": false + }, + "azure_ai/gpt-5.4-nano-2026-03-17": { + "cache_read_input_token_cost": 2e-8, + "cache_read_input_token_cost_above_272k_tokens": 4e-8, + "cache_read_input_token_cost_priority": 4e-8, + "cache_read_input_token_cost_above_272k_tokens_priority": 8e-8, + "input_cost_per_token": 2e-7, + "input_cost_per_token_above_272k_tokens": 4e-7, + "input_cost_per_token_priority": 4e-7, + "input_cost_per_token_above_272k_tokens_priority": 8e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.00000125, + "output_cost_per_token_above_272k_tokens": 0.000001875, + "output_cost_per_token_priority": 0.0000025, + "output_cost_per_token_above_272k_tokens_priority": 0.00000375, + "source": "https://ai.azure.com/catalog/models/gpt-5.4-nano", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_service_tier": true, + "supports_vision": true, + "supports_web_search": true, + "supports_none_reasoning_effort": true, + "supports_xhigh_reasoning_effort": true, + "supports_minimal_reasoning_effort": false + }, + "azure_ai/gpt-oss-120b": { + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "source": "https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "azure_ai/grok-3": { + "input_cost_per_token": 0.000003, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 0.000015, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/", + "supports_function_calling": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-3-mini": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 0.00000127, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-4": { + "input_cost_per_token": 0.000003, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 0.000015, "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/grok/", "supports_function_calling": true, "supports_response_schema": true, @@ -6390,7 +6746,6 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, "supports_pdf_input": true }, @@ -6568,7 +6923,6 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, "supports_pdf_input": true }, @@ -7131,8 +7485,7 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "supports_web_search": true, - "tool_use_system_prompt_tokens": 159 + "supports_web_search": true }, "claude-3-haiku-20240307": { "cache_creation_input_token_cost": 3e-7, @@ -7150,8 +7503,7 @@ "supports_prompt_caching": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 264 + "supports_vision": true }, "claude-3-opus-20240229": { "cache_creation_input_token_cost": 0.00001875, @@ -7170,8 +7522,7 @@ "supports_prompt_caching": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 395 + "supports_vision": true }, "claude-4-opus-20250514": { "cache_creation_input_token_cost": 0.00001875, @@ -7196,8 +7547,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "claude-4-sonnet-20250514": { "cache_creation_input_token_cost": 0.00000375, @@ -7227,8 +7577,7 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "supports_web_search": true, - "tool_use_system_prompt_tokens": 159 + "supports_web_search": true }, "claude-haiku-4-5": { "cache_creation_input_token_cost": 0.00000125, @@ -7296,8 +7645,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "claude-opus-4-1-20250805": { "cache_creation_input_token_cost": 0.00001875, @@ -7324,8 +7672,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "claude-opus-4-20250514": { "cache_creation_input_token_cost": 0.00001875, @@ -7352,8 +7699,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "claude-opus-4-5": { "cache_creation_input_token_cost": 0.00000625, @@ -7377,11 +7723,10 @@ "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, - "supports_minimal_reasoning_effort": true, "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_output_config": true }, "claude-opus-4-5-20251101": { "cache_creation_input_token_cost": 0.00000625, @@ -7405,11 +7750,10 @@ "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, - "supports_minimal_reasoning_effort": true, "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_output_config": true }, "claude-opus-4-6": { "cache_creation_input_token_cost": 0.00000625, @@ -7437,13 +7781,12 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "provider_specific_entry": { "us": 1.1, "fast": 6 }, - "supports_max_reasoning_effort": true, - "supports_minimal_reasoning_effort": true + "supports_output_config": true, + "supports_max_reasoning_effort": true }, "claude-opus-4-6-20260205": { "cache_creation_input_token_cost": 0.00000625, @@ -7471,13 +7814,12 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "provider_specific_entry": { "us": 1.1, "fast": 6 }, "supports_max_reasoning_effort": true, - "supports_minimal_reasoning_effort": true + "supports_output_config": true }, "claude-opus-4-7": { "cache_creation_input_token_cost": 0.00000625, @@ -7507,12 +7849,11 @@ "supports_vision": true, "supports_xhigh_reasoning_effort": true, "supports_max_reasoning_effort": true, - "tool_use_system_prompt_tokens": 346, "provider_specific_entry": { "us": 1.1, "fast": 6 }, - "supports_minimal_reasoning_effort": true + "supports_output_config": true }, "claude-opus-4-7-20260416": { "cache_creation_input_token_cost": 0.00000625, @@ -7542,12 +7883,45 @@ "supports_vision": true, "supports_xhigh_reasoning_effort": true, "supports_max_reasoning_effort": true, - "tool_use_system_prompt_tokens": 346, "provider_specific_entry": { "us": 1.1, "fast": 6 }, - "supports_minimal_reasoning_effort": true + "supports_output_config": true + }, + "claude-opus-4-8": { + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_1hr": 0.00001, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "anthropic", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_adaptive_thinking": true, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_max_reasoning_effort": true, + "provider_specific_entry": { + "us": 1.1, + "fast": 2 + }, + "supports_output_config": true }, "claude-sonnet-4-20250514": { "deprecation_date": "2026-05-14", @@ -7578,8 +7952,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "claude-sonnet-4-5": { "cache_creation_input_token_cost": 0.00000375, @@ -7608,8 +7981,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "supports_vision": true }, "claude-sonnet-4-5-20250929": { "cache_creation_input_token_cost": 0.00000375, @@ -7639,8 +8011,7 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "supports_web_search": true, - "tool_use_system_prompt_tokens": 346 + "supports_web_search": true }, "claude-sonnet-4-5-20250929-v1:0": { "cache_creation_input_token_cost": 0.00000375, @@ -7664,8 +8035,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "claude-sonnet-4-6": { "cache_creation_input_token_cost": 0.00000375, @@ -7693,8 +8063,7 @@ "supports_max_reasoning_effort": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, - "supports_minimal_reasoning_effort": true + "supports_output_config": true }, "cloudflare/@cf/meta/llama-2-7b-chat-fp16": { "input_cost_per_token": 0.000001923, @@ -8841,8 +9210,8 @@ "supports_assistant_prefill": true, "supports_function_calling": true, "supports_reasoning": true, - "supports_minimal_reasoning_effort": true, - "supports_tool_choice": true + "supports_tool_choice": true, + "supports_output_config": true }, "databricks/databricks-claude-sonnet-4": { "input_cost_per_token": 0.0000029999900000000002, @@ -10359,7 +10728,6 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true }, "eu.anthropic.claude-opus-4-1-20250805-v1:0": { @@ -10385,8 +10753,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "eu.anthropic.claude-opus-4-20250514-v1:0": { "cache_creation_input_token_cost": 0.00001875, @@ -10411,8 +10778,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "eu.anthropic.claude-opus-4-5-20251101-v1:0": { "cache_creation_input_token_cost": 0.00000625, @@ -10432,15 +10798,15 @@ "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, - "supports_minimal_reasoning_effort": true, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159, - "supports_native_structured_output": true + "supports_native_structured_output": true, + "supports_output_config": true, + "bedrock_output_config_effort_ceiling": "high" }, "eu.anthropic.claude-opus-4-6-v1": { "cache_creation_input_token_cost": 0.000006875, @@ -10466,10 +10832,10 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, + "supports_output_config": true, "supports_max_reasoning_effort": true, - "supports_minimal_reasoning_effort": true + "bedrock_output_config_effort_ceiling": "max" }, "eu.anthropic.claude-opus-4-7": { "cache_creation_input_token_cost": 0.000006875, @@ -10496,10 +10862,41 @@ "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, "supports_max_reasoning_effort": true, - "supports_minimal_reasoning_effort": true + "supports_output_config": true, + "bedrock_output_config_effort_ceiling": "xhigh" + }, + "eu.anthropic.claude-opus-4-8": { + "cache_creation_input_token_cost": 0.000006875, + "cache_creation_input_token_cost_above_1hr": 0.000011, + "cache_read_input_token_cost": 5.5e-7, + "input_cost_per_token": 0.0000055, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.0000275, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_native_structured_output": true, + "supports_max_reasoning_effort": true, + "supports_output_config": true, + "bedrock_output_config_effort_ceiling": "xhigh" }, "eu.anthropic.claude-sonnet-4-20250514-v1:0": { "cache_creation_input_token_cost": 0.00000375, @@ -10528,8 +10925,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "eu.anthropic.claude-sonnet-4-5-20250929-v1:0": { "cache_creation_input_token_cost": 0.000004125, @@ -10559,7 +10955,6 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true }, "eu.anthropic.claude-sonnet-4-6": { @@ -10587,9 +10982,8 @@ "supports_max_reasoning_effort": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, - "supports_minimal_reasoning_effort": true + "supports_output_config": true }, "eu.deepseek.v3.2": { "input_cost_per_token": 7.4e-7, @@ -11381,7 +11775,22 @@ "supports_response_schema": true, "supports_tool_choice": true }, - "fireworks_ai/accounts/fireworks/models/gpt-oss-120b": { + "fireworks_ai/accounts/fireworks/models/glm-5p1": { + "cache_read_input_token_cost": 2.6e-7, + "input_cost_per_token": 0.0000014, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 202800, + "max_output_tokens": 202800, + "max_tokens": 202800, + "mode": "chat", + "output_cost_per_token": 0.0000044, + "source": "https://fireworks.ai/models/fireworks/glm-5p1", + "supports_function_calling": false, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_tool_choice": false + }, + "fireworks_ai/accounts/fireworks/models/gpt-oss-120b": { "input_cost_per_token": 1.5e-7, "litellm_provider": "fireworks_ai", "max_input_tokens": 131072, @@ -12982,6 +13391,21 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "fireworks_ai/glm-5p1": { + "cache_read_input_token_cost": 2.6e-7, + "input_cost_per_token": 0.0000014, + "litellm_provider": "fireworks_ai", + "max_input_tokens": 202800, + "max_output_tokens": 202800, + "max_tokens": 202800, + "mode": "chat", + "output_cost_per_token": 0.0000044, + "source": "https://fireworks.ai/models/fireworks/glm-5p1", + "supports_function_calling": false, + "supports_reasoning": true, + "supports_response_schema": false, + "supports_tool_choice": false + }, "fireworks_ai/kimi-k2p5": { "cache_read_input_token_cost": 1e-7, "input_cost_per_token": 6e-7, @@ -14086,6 +14510,73 @@ }, "web_search_billing_unit": "per_query" }, + "gemini-3.1-flash-lite": { + "cache_read_input_token_cost": 2.5e-8, + "cache_read_input_token_cost_batches": 1.25e-8, + "cache_read_input_token_cost_flex": 1.25e-8, + "cache_read_input_token_cost_per_audio_token": 5e-8, + "cache_read_input_token_cost_priority": 4.5e-8, + "input_cost_per_audio_token": 5e-7, + "input_cost_per_token": 2.5e-7, + "input_cost_per_token_batches": 1.25e-7, + "input_cost_per_token_flex": 1.25e-7, + "input_cost_per_token_priority": 4.5e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 0.0000015, + "output_cost_per_token": 0.0000015, + "output_cost_per_token_batches": 7.5e-7, + "output_cost_per_token_flex": 7.5e-7, + "output_cost_per_token_priority": 0.0000027, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-3.1-flash-lite", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_audio_output": false, + "supports_code_execution": true, + "supports_file_search": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_native_streaming": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query", + "supports_service_tier": true + }, "gemini-3.1-flash-lite-preview": { "cache_read_input_token_cost": 2.5e-8, "cache_read_input_token_cost_per_audio_token": 5e-8, @@ -14297,6 +14788,65 @@ }, "web_search_billing_unit": "per_query" }, + "gemini-3.5-flash": { + "cache_read_input_token_cost": 1.5e-7, + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 0.0000015, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 0.000009, + "output_cost_per_token": 0.000009, + "source": "https://ai.google.dev/pricing/gemini-3", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_audio_input": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_native_streaming": true, + "input_cost_per_token_priority": 0.0000027, + "input_cost_per_audio_token_priority": 0.0000018, + "output_cost_per_token_priority": 0.0000162, + "cache_read_input_token_cost_priority": 2.7e-7, + "supports_service_tier": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query" + }, "gemini-exp-1206": { "cache_read_input_token_cost": 3e-8, "input_cost_per_audio_token": 0.000001, @@ -15488,6 +16038,75 @@ }, "web_search_billing_unit": "per_query" }, + "gemini/gemini-3.1-flash-lite": { + "cache_read_input_token_cost": 2.5e-8, + "cache_read_input_token_cost_batches": 1.25e-8, + "cache_read_input_token_cost_flex": 1.25e-8, + "cache_read_input_token_cost_per_audio_token": 5e-8, + "cache_read_input_token_cost_priority": 4.5e-8, + "input_cost_per_audio_token": 5e-7, + "input_cost_per_token": 2.5e-7, + "input_cost_per_token_batches": 1.25e-7, + "input_cost_per_token_flex": 1.25e-7, + "input_cost_per_token_priority": 4.5e-7, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 0.0000015, + "output_cost_per_token": 0.0000015, + "output_cost_per_token_batches": 7.5e-7, + "output_cost_per_token_flex": 7.5e-7, + "output_cost_per_token_priority": 0.0000027, + "rpm": 15, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-3.1-flash-lite", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_audio_output": false, + "supports_code_execution": true, + "supports_file_search": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_native_streaming": true, + "tpm": 250000, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query", + "supports_service_tier": true + }, "gemini/gemini-3.1-flash-lite-preview": { "cache_read_input_token_cost": 2.5e-8, "cache_read_input_token_cost_per_audio_token": 5e-8, @@ -15710,43 +16329,104 @@ }, "web_search_billing_unit": "per_query" }, - "gemini/gemini-exp-1114": { - "input_cost_per_token": 0, - "input_cost_per_token_above_128k_tokens": 0, + "gemini/gemini-3.5-flash": { + "cache_read_input_token_cost": 1.5e-7, + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 0.0000015, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_images_per_prompt": 3000, "max_input_tokens": 1048576, - "max_output_tokens": 8192, + "max_output_tokens": 65535, "max_pdf_size_mb": 30, - "max_tokens": 8192, + "max_tokens": 65535, "max_video_length": 1, "max_videos_per_prompt": 10, - "metadata": { - "notes": "Rate limits not documented for gemini-exp-1114. Assuming same as gemini-1.5-pro.", - "supports_tool_choice": true - }, "mode": "chat", - "output_cost_per_token": 0, - "output_cost_per_token_above_128k_tokens": 0, - "rpm": 1000, - "source": "https://ai.google.dev/pricing", + "output_cost_per_reasoning_token": 0.000009, + "output_cost_per_token": 0.000009, + "rpm": 2000, + "source": "https://ai.google.dev/pricing/gemini-3", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_output": false, + "supports_audio_input": true, "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, "supports_response_schema": true, "supports_system_messages": true, "supports_tool_choice": true, + "supports_url_context": true, + "supports_video_input": true, "supports_vision": true, - "tpm": 4000000 - }, - "gemini/gemini-exp-1206": { - "input_cost_per_token": 0, - "input_cost_per_token_above_128k_tokens": 0, - "litellm_provider": "gemini", - "max_audio_length_hours": 8.4, - "max_audio_per_prompt": 1, - "max_images_per_prompt": 3000, - "max_input_tokens": 2097152, + "supports_web_search": true, + "supports_native_streaming": true, + "tpm": 800000, + "input_cost_per_token_priority": 0.0000027, + "input_cost_per_audio_token_priority": 0.0000018, + "output_cost_per_token_priority": 0.0000162, + "cache_read_input_token_cost_priority": 2.7e-7, + "supports_service_tier": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query" + }, + "gemini/gemini-exp-1114": { + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_pdf_size_mb": 30, + "max_tokens": 8192, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "metadata": { + "notes": "Rate limits not documented for gemini-exp-1114. Assuming same as gemini-1.5-pro.", + "supports_tool_choice": true + }, + "mode": "chat", + "output_cost_per_token": 0, + "output_cost_per_token_above_128k_tokens": 0, + "rpm": 1000, + "source": "https://ai.google.dev/pricing", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "tpm": 4000000 + }, + "gemini/gemini-exp-1206": { + "input_cost_per_token": 0, + "input_cost_per_token_above_128k_tokens": 0, + "litellm_provider": "gemini", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 2097152, "max_output_tokens": 8192, "max_pdf_size_mb": 30, "max_tokens": 8192, @@ -16160,7 +16840,7 @@ "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_vision": true, - "supports_minimal_reasoning_effort": true + "supports_output_config": true }, "github_copilot/claude-opus-4.6-fast": { "litellm_provider": "github_copilot", @@ -16460,7 +17140,6 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true }, "global.anthropic.claude-opus-4-5-20251101-v1:0": { @@ -16482,15 +17161,15 @@ "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, - "supports_minimal_reasoning_effort": true, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159, - "supports_native_structured_output": true + "supports_native_structured_output": true, + "supports_output_config": true, + "bedrock_output_config_effort_ceiling": "high" }, "global.anthropic.claude-opus-4-6-v1": { "cache_creation_input_token_cost": 0.00000625, @@ -16517,10 +17196,10 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, + "supports_output_config": true, "supports_max_reasoning_effort": true, - "supports_minimal_reasoning_effort": true + "bedrock_output_config_effort_ceiling": "max" }, "global.anthropic.claude-opus-4-7": { "cache_creation_input_token_cost": 0.00000625, @@ -16548,10 +17227,41 @@ "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, "supports_max_reasoning_effort": true, - "supports_minimal_reasoning_effort": true + "supports_output_config": true, + "bedrock_output_config_effort_ceiling": "xhigh" + }, + "global.anthropic.claude-opus-4-8": { + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_1hr": 0.00001, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_native_structured_output": true, + "supports_max_reasoning_effort": true, + "supports_output_config": true, + "bedrock_output_config_effort_ceiling": "xhigh" }, "global.anthropic.claude-sonnet-4-20250514-v1:0": { "cache_creation_input_token_cost": 0.00000375, @@ -16580,8 +17290,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "global.anthropic.claude-sonnet-4-5-20250929-v1:0": { "cache_creation_input_token_cost": 0.00000375, @@ -16613,7 +17322,6 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true }, "global.anthropic.claude-sonnet-4-6": { @@ -16642,9 +17350,8 @@ "supports_max_reasoning_effort": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, - "supports_minimal_reasoning_effort": true + "supports_output_config": true }, "gmi/MiniMaxAI/MiniMax-M2.1": { "input_cost_per_token": 3e-7, @@ -16686,7 +17393,7 @@ "output_cost_per_token": 0.000025, "supports_function_calling": true, "supports_vision": true, - "supports_minimal_reasoning_effort": true + "supports_output_config": true }, "gmi/anthropic/claude-sonnet-4": { "input_cost_per_token": 0.000003, @@ -17039,6 +17746,8 @@ "output_cost_per_token": 0.000008, "output_cost_per_token_batches": 0.000004, "output_cost_per_token_priority": 0.000014, + "regional_processing_uplift_multiplier_eu": 1.1, + "regional_processing_uplift_multiplier_us": 1.1, "supported_endpoints": [ "/v1/chat/completions", "/v1/batch", @@ -17112,6 +17821,8 @@ "output_cost_per_token": 0.0000016, "output_cost_per_token_batches": 8e-7, "output_cost_per_token_priority": 0.0000028, + "regional_processing_uplift_multiplier_eu": 1.1, + "regional_processing_uplift_multiplier_us": 1.1, "supported_endpoints": [ "/v1/chat/completions", "/v1/batch", @@ -17185,6 +17896,8 @@ "output_cost_per_token": 4e-7, "output_cost_per_token_batches": 2e-7, "output_cost_per_token_priority": 8e-7, + "regional_processing_uplift_multiplier_eu": 1.1, + "regional_processing_uplift_multiplier_us": 1.1, "supported_endpoints": [ "/v1/chat/completions", "/v1/batch", @@ -17256,6 +17969,8 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_batches": 0.000005, "output_cost_per_token_priority": 0.000017, + "regional_processing_uplift_multiplier_eu": 1.1, + "regional_processing_uplift_multiplier_us": 1.1, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -17297,6 +18012,8 @@ "mode": "chat", "output_cost_per_token": 0.00001, "output_cost_per_token_batches": 0.000005, + "regional_processing_uplift_multiplier_eu": 1.1, + "regional_processing_uplift_multiplier_us": 1.1, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -17318,6 +18035,8 @@ "mode": "chat", "output_cost_per_token": 0.00001, "output_cost_per_token_batches": 0.000005, + "regional_processing_uplift_multiplier_eu": 1.1, + "regional_processing_uplift_multiplier_us": 1.1, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -17393,6 +18112,8 @@ "output_cost_per_token": 6e-7, "output_cost_per_token_batches": 3e-7, "output_cost_per_token_priority": 0.000001, + "regional_processing_uplift_multiplier_eu": 1.1, + "regional_processing_uplift_multiplier_us": 1.1, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -17662,6 +18383,8 @@ "output_cost_per_token": 0.00001, "output_cost_per_token_flex": 0.000005, "output_cost_per_token_priority": 0.00002, + "regional_processing_uplift_multiplier_eu": 1.1, + "regional_processing_uplift_multiplier_us": 1.1, "supported_endpoints": [ "/v1/chat/completions", "/v1/batch", @@ -17818,6 +18541,8 @@ "output_cost_per_token": 0.000002, "output_cost_per_token_flex": 0.000001, "output_cost_per_token_priority": 0.0000036, + "regional_processing_uplift_multiplier_eu": 1.1, + "regional_processing_uplift_multiplier_us": 1.1, "supported_endpoints": [ "/v1/chat/completions", "/v1/batch", @@ -17899,6 +18624,8 @@ "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, + "regional_processing_uplift_multiplier_eu": 1.1, + "regional_processing_uplift_multiplier_us": 1.1, "mode": "chat", "output_cost_per_token": 4e-7, "output_cost_per_token_flex": 2e-7, @@ -18930,6 +19657,38 @@ "supports_system_messages": true, "supports_tool_choice": true }, + "gpt-realtime-2": { + "cache_creation_input_audio_token_cost": 4e-7, + "cache_read_input_token_cost": 4e-7, + "input_cost_per_audio_token": 0.000032, + "input_cost_per_image": 0.000005, + "input_cost_per_token": 0.000004, + "litellm_provider": "openai", + "max_input_tokens": 32000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 0.000064, + "output_cost_per_token": 0.000016, + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, "gpt-realtime-2025-08-28": { "cache_creation_input_audio_token_cost": 4e-7, "cache_read_input_token_cost": 4e-7, @@ -19763,7 +20522,6 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true }, "jp.anthropic.claude-sonnet-4-5-20250929-v1:0": { @@ -19794,9 +20552,36 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true }, + "jp.anthropic.claude-sonnet-4-6": { + "cache_creation_input_token_cost": 0.000004125, + "cache_read_input_token_cost": 3.3e-7, + "input_cost_per_token": 0.0000033, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.0000165, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_max_reasoning_effort": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_native_structured_output": true, + "supports_output_config": true + }, "kimi-k2-thinking-251104": { "input_cost_per_token": 0, "litellm_provider": "volcengine", @@ -21123,6 +21908,21 @@ "supports_tool_choice": true, "supports_vision": true }, + "mistral/ministral-8b-2512": { + "input_cost_per_token": 1.5e-7, + "litellm_provider": "mistral", + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "max_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1.5e-7, + "source": "https://mistral.ai/pricing", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, "mistral/mistral-large-2402": { "input_cost_per_token": 0.000004, "litellm_provider": "mistral", @@ -23690,7 +24490,21 @@ "output_cost_per_token": 0.00000156, "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", "supports_function_calling": true, - "supports_response_schema": false + "supports_response_schema": false, + "supports_native_streaming": true + }, + "oci/cohere.command-a-reasoning": { + "input_cost_per_token": 0.00000156, + "litellm_provider": "oci", + "max_input_tokens": 256000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000156, + "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", + "supports_function_calling": false, + "supports_response_schema": false, + "supports_native_streaming": true }, "oci/cohere.command-a-reasoning-08-2025": { "input_cost_per_token": 0.00000156, @@ -23716,6 +24530,20 @@ "supports_function_calling": false, "supports_response_schema": false }, + "oci/cohere.command-a-vision": { + "input_cost_per_token": 0.00000156, + "litellm_provider": "oci", + "max_input_tokens": 256000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 0.00000156, + "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", + "supports_function_calling": true, + "supports_response_schema": false, + "supports_native_streaming": true, + "supports_vision": true + }, "oci/cohere.command-a-vision-07-2025": { "input_cost_per_token": 0.00000156, "litellm_provider": "oci", @@ -23739,7 +24567,8 @@ "output_cost_per_token": 0.00000156, "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", "supports_function_calling": true, - "supports_response_schema": false + "supports_response_schema": false, + "supports_native_streaming": true }, "oci/cohere.command-plus-latest": { "input_cost_per_token": 0.00000156, @@ -23751,7 +24580,8 @@ "output_cost_per_token": 0.00000156, "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", "supports_function_calling": true, - "supports_response_schema": false + "supports_response_schema": false, + "supports_native_streaming": true }, "oci/cohere.command-r-08-2024": { "input_cost_per_token": 1.5e-7, @@ -23788,7 +24618,8 @@ "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, "supports_response_schema": true, - "supports_vision": true + "supports_vision": true, + "supports_native_streaming": true }, "oci/google.gemini-2.5-flash-lite": { "input_cost_per_token": 7.5e-8, @@ -23801,7 +24632,8 @@ "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, "supports_response_schema": true, - "supports_vision": true + "supports_vision": true, + "supports_native_streaming": true }, "oci/google.gemini-2.5-pro": { "input_cost_per_token": 0.00000125, @@ -23814,7 +24646,8 @@ "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, "supports_response_schema": true, - "supports_vision": true + "supports_vision": true, + "supports_native_streaming": true }, "oci/meta.llama-3.1-405b-instruct": { "input_cost_per_token": 0.00001068, @@ -23826,7 +24659,8 @@ "output_cost_per_token": 0.00001068, "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, - "supports_response_schema": false + "supports_response_schema": false, + "supports_native_streaming": true }, "oci/meta.llama-3.1-70b-instruct": { "input_cost_per_token": 7.2e-7, @@ -23838,7 +24672,21 @@ "output_cost_per_token": 7.2e-7, "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, - "supports_response_schema": false + "supports_response_schema": false, + "supports_native_streaming": true + }, + "oci/meta.llama-3.1-8b-instruct": { + "input_cost_per_token": 7.2e-7, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 4000, + "mode": "chat", + "output_cost_per_token": 7.2e-7, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_response_schema": false, + "supports_native_streaming": true }, "oci/meta.llama-3.2-11b-vision-instruct": { "input_cost_per_token": 0.000002, @@ -23864,6 +24712,7 @@ "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, "supports_response_schema": false, + "supports_native_streaming": true, "supports_vision": true }, "oci/meta.llama-3.3-70b-instruct": { @@ -23876,7 +24725,8 @@ "output_cost_per_token": 7.2e-7, "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, - "supports_response_schema": false + "supports_response_schema": false, + "supports_native_streaming": true }, "oci/meta.llama-3.3-70b-instruct-fp8-dynamic": { "input_cost_per_token": 7.2e-7, @@ -23893,26 +24743,74 @@ "oci/meta.llama-4-maverick-17b-128e-instruct-fp8": { "input_cost_per_token": 7.2e-7, "litellm_provider": "oci", - "max_input_tokens": 512000, - "max_output_tokens": 4000, - "max_tokens": 4000, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_tokens": 8192, "mode": "chat", "output_cost_per_token": 7.2e-7, "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, - "supports_response_schema": false + "supports_response_schema": false, + "supports_native_streaming": true, + "supports_vision": true }, "oci/meta.llama-4-scout-17b-16e-instruct": { "input_cost_per_token": 7.2e-7, "litellm_provider": "oci", - "max_input_tokens": 192000, - "max_output_tokens": 4000, - "max_tokens": 4000, + "max_input_tokens": 10485760, + "max_output_tokens": 8192, + "max_tokens": 8192, "mode": "chat", "output_cost_per_token": 7.2e-7, "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, - "supports_response_schema": false + "supports_response_schema": false, + "supports_native_streaming": true + }, + "oci/openai.gpt-5": { + "input_cost_per_token": 0.00000125, + "litellm_provider": "oci", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.00001, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_vision": true + }, + "oci/openai.gpt-5-mini": { + "input_cost_per_token": 2.5e-7, + "litellm_provider": "oci", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.000002, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_vision": true + }, + "oci/openai.gpt-5-nano": { + "input_cost_per_token": 5e-8, + "litellm_provider": "oci", + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 4e-7, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", + "supports_function_calling": true, + "supports_native_streaming": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_vision": true }, "oci/xai.grok-3": { "input_cost_per_token": 0.000003, @@ -23924,7 +24822,8 @@ "output_cost_per_token": 0.000015, "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, - "supports_response_schema": false + "supports_response_schema": false, + "supports_native_streaming": true }, "oci/xai.grok-3-fast": { "input_cost_per_token": 0.000005, @@ -23936,7 +24835,8 @@ "output_cost_per_token": 0.000025, "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, - "supports_response_schema": false + "supports_response_schema": false, + "supports_native_streaming": true }, "oci/xai.grok-3-mini": { "input_cost_per_token": 3e-7, @@ -23948,7 +24848,8 @@ "output_cost_per_token": 5e-7, "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, - "supports_response_schema": false + "supports_response_schema": false, + "supports_native_streaming": true }, "oci/xai.grok-3-mini-fast": { "input_cost_per_token": 6e-7, @@ -23960,7 +24861,8 @@ "output_cost_per_token": 0.000004, "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, - "supports_response_schema": false + "supports_response_schema": false, + "supports_native_streaming": true }, "oci/xai.grok-4": { "input_cost_per_token": 0.000003, @@ -23972,7 +24874,8 @@ "output_cost_per_token": 0.000015, "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing", "supports_function_calling": true, - "supports_response_schema": false + "supports_response_schema": false, + "supports_native_streaming": true }, "oci/xai.grok-4-fast": { "input_cost_per_token": 0.000005, @@ -24313,8 +25216,7 @@ "supports_computer_use": true, "supports_function_calling": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "openrouter/anthropic/claude-3.7-sonnet": { "input_cost_per_image": 0.0048, @@ -24330,8 +25232,7 @@ "supports_function_calling": true, "supports_reasoning": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "openrouter/anthropic/claude-haiku-4.5": { "cache_creation_input_token_cost": 0.00000125, @@ -24349,8 +25250,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 346 + "supports_vision": true }, "openrouter/anthropic/claude-opus-4": { "input_cost_per_image": 0.0048, @@ -24369,8 +25269,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "openrouter/anthropic/claude-opus-4.1": { "input_cost_per_image": 0.0048, @@ -24390,8 +25289,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "openrouter/anthropic/claude-opus-4.5": { "cache_creation_input_token_cost": 0.00000625, @@ -24406,12 +25304,11 @@ "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, - "supports_minimal_reasoning_effort": true, "supports_prompt_caching": true, "supports_reasoning": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_output_config": true }, "openrouter/anthropic/claude-opus-4.6": { "cache_creation_input_token_cost": 0.00000625, @@ -24430,9 +25327,7 @@ "supports_reasoning": true, "supports_max_reasoning_effort": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 346, - "supports_minimal_reasoning_effort": true + "supports_vision": true }, "openrouter/anthropic/claude-opus-4.7": { "cache_creation_input_token_cost": 0.00000625, @@ -24454,8 +25349,7 @@ "supports_max_reasoning_effort": true, "supports_tool_choice": true, "supports_vision": true, - "supports_xhigh_reasoning_effort": true, - "tool_use_system_prompt_tokens": 346 + "supports_xhigh_reasoning_effort": true }, "openrouter/anthropic/claude-sonnet-4": { "input_cost_per_image": 0.0048, @@ -24478,8 +25372,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "openrouter/anthropic/claude-sonnet-4.5": { "input_cost_per_image": 0.0048, @@ -24502,8 +25395,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "openrouter/anthropic/claude-sonnet-4.6": { "cache_creation_input_token_cost": 0.00000375, @@ -24527,9 +25419,7 @@ "supports_reasoning": true, "supports_max_reasoning_effort": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159, - "supports_minimal_reasoning_effort": true + "supports_vision": true }, "openrouter/bytedance/ui-tars-1.5-7b": { "input_cost_per_token": 1e-7, @@ -24800,6 +25690,58 @@ "supports_vision": true, "supports_web_search": true }, + "openrouter/google/gemini-3.1-flash-lite": { + "cache_read_input_token_cost": 2.5e-8, + "cache_read_input_token_cost_per_audio_token": 5e-8, + "input_cost_per_audio_token": 5e-7, + "input_cost_per_token": 2.5e-7, + "litellm_provider": "openrouter", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 0.0000015, + "output_cost_per_token": 0.0000015, + "rpm": 2000, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-3.1-flash-lite", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_audio_output": false, + "supports_code_execution": true, + "supports_file_search": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 800000 + }, "openrouter/google/gemini-3.1-flash-lite-preview": { "cache_read_input_token_cost": 2.5e-8, "cache_read_input_token_cost_per_audio_token": 5e-8, @@ -25653,6 +26595,20 @@ "supports_tool_choice": true, "supports_vision": true }, + "openrouter/qwen/qwen3.6-plus": { + "input_cost_per_token": 3.25e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 1000000, + "max_output_tokens": 65536, + "max_tokens": 65536, + "mode": "chat", + "output_cost_per_token": 0.00000195, + "source": "https://openrouter.ai/qwen/qwen3.6-plus", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true + }, "openrouter/switchpoint/router": { "input_cost_per_token": 8.5e-7, "litellm_provider": "openrouter", @@ -25689,10 +26645,10 @@ "supports_web_search": true }, "openrouter/xiaomi/mimo-v2-flash": { - "input_cost_per_token": 9e-8, - "output_cost_per_token": 2.9e-7, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 3e-7, "cache_creation_input_token_cost": 0, - "cache_read_input_token_cost": 0, + "cache_read_input_token_cost": 1e-8, "litellm_provider": "openrouter", "max_input_tokens": 262144, "max_output_tokens": 16384, @@ -25702,7 +26658,43 @@ "supports_tool_choice": true, "supports_reasoning": true, "supports_vision": false, - "supports_prompt_caching": false + "supports_prompt_caching": true + }, + "openrouter/xiaomi/mimo-v2.5": { + "input_cost_per_token": 4e-7, + "output_cost_per_token": 0.000002, + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 8e-8, + "litellm_provider": "openrouter", + "max_input_tokens": 1048576, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_vision": true, + "supports_audio_input": true, + "supports_video_input": true, + "supports_response_schema": true, + "supports_prompt_caching": true + }, + "openrouter/xiaomi/mimo-v2.5-pro": { + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "cache_creation_input_token_cost": 0, + "cache_read_input_token_cost": 2e-7, + "litellm_provider": "openrouter", + "max_input_tokens": 1048576, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_vision": false, + "supports_response_schema": true, + "supports_prompt_caching": true }, "openrouter/z-ai/glm-4.6": { "input_cost_per_token": 4e-7, @@ -27014,6 +28006,19 @@ "output_cost_per_token": 3e-7, "source": "https://cloud.sambanova.ai/plans/pricing" }, + "sambanova/MiniMax-M2.7": { + "input_cost_per_token": 3e-7, + "litellm_provider": "sambanova", + "max_input_tokens": 204800, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 0.0000012, + "source": "https://cloud.sambanova.ai/plans/pricing", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true + }, "sambanova/QwQ-32B": { "input_cost_per_token": 5e-7, "litellm_provider": "sambanova", @@ -27675,7 +28680,6 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true }, "us.amazon.nova-2-lite-v1:0": { @@ -27908,7 +28912,6 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true }, "us.anthropic.claude-opus-4-1-20250805-v1:0": { @@ -27934,8 +28937,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "us.anthropic.claude-opus-4-20250514-v1:0": { "cache_creation_input_token_cost": 0.00001875, @@ -27946,7 +28948,33 @@ "max_output_tokens": 32000, "max_tokens": 32000, "mode": "chat", - "output_cost_per_token": 0.000075, + "output_cost_per_token": 0.000075, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "us.anthropic.claude-opus-4-5-20251101-v1:0": { + "cache_creation_input_token_cost": 0.000006875, + "cache_creation_input_token_cost_above_1hr": 0.000011, + "cache_read_input_token_cost": 5.5e-7, + "input_cost_per_token": 0.0000055, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 0.0000275, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -27961,17 +28989,19 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_native_structured_output": true, + "supports_output_config": true, + "bedrock_output_config_effort_ceiling": "high" }, - "us.anthropic.claude-opus-4-5-20251101-v1:0": { + "us.anthropic.claude-opus-4-6-v1": { "cache_creation_input_token_cost": 0.000006875, "cache_creation_input_token_cost_above_1hr": 0.000011, "cache_read_input_token_cost": 5.5e-7, "input_cost_per_token": 0.0000055, "litellm_provider": "bedrock_converse", - "max_input_tokens": 200000, - "max_output_tokens": 64000, - "max_tokens": 64000, + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, "mode": "chat", "output_cost_per_token": 0.0000275, "search_context_cost_per_query": { @@ -27979,20 +29009,21 @@ "search_context_size_low": 0.01, "search_context_size_medium": 0.01 }, - "supports_assistant_prefill": true, + "supports_assistant_prefill": false, "supports_computer_use": true, "supports_function_calling": true, - "supports_minimal_reasoning_effort": true, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159, - "supports_native_structured_output": true + "supports_native_structured_output": true, + "supports_output_config": true, + "supports_max_reasoning_effort": true, + "bedrock_output_config_effort_ceiling": "max" }, - "us.anthropic.claude-opus-4-6-v1": { + "us.anthropic.claude-opus-4-7": { "cache_creation_input_token_cost": 0.000006875, "cache_creation_input_token_cost_above_1hr": 0.000011, "cache_read_input_token_cost": 5.5e-7, @@ -28017,12 +29048,13 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, + "supports_xhigh_reasoning_effort": true, "supports_native_structured_output": true, "supports_max_reasoning_effort": true, - "supports_minimal_reasoning_effort": true + "supports_output_config": true, + "bedrock_output_config_effort_ceiling": "xhigh" }, - "us.anthropic.claude-opus-4-7": { + "us.anthropic.claude-opus-4-8": { "cache_creation_input_token_cost": 0.000006875, "cache_creation_input_token_cost_above_1hr": 0.000011, "cache_read_input_token_cost": 5.5e-7, @@ -28048,10 +29080,10 @@ "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, "supports_max_reasoning_effort": true, - "supports_minimal_reasoning_effort": true + "supports_output_config": true, + "bedrock_output_config_effort_ceiling": "xhigh" }, "us.anthropic.claude-sonnet-4-20250514-v1:0": { "cache_creation_input_token_cost": 0.00000375, @@ -28080,8 +29112,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "us.anthropic.claude-sonnet-4-5-20250929-v1:0": { "cache_creation_input_token_cost": 0.000004125, @@ -28113,7 +29144,6 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true }, "us.anthropic.claude-sonnet-4-6": { @@ -28142,9 +29172,8 @@ "supports_max_reasoning_effort": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, - "supports_minimal_reasoning_effort": true + "supports_output_config": true }, "us.deepseek.r1-v1:0": { "input_cost_per_token": 0.00000135, @@ -28704,13 +29733,13 @@ "output_cost_per_token": 0.000025, "supports_assistant_prefill": true, "supports_computer_use": true, - "supports_minimal_reasoning_effort": true, "supports_function_calling": true, "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true + "supports_vision": true, + "supports_output_config": true }, "vercel_ai_gateway/anthropic/claude-opus-4.6": { "cache_creation_input_token_cost": 0.00000625, @@ -28730,7 +29759,7 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "supports_minimal_reasoning_effort": true + "supports_output_config": true }, "vercel_ai_gateway/anthropic/claude-sonnet-4": { "cache_creation_input_token_cost": 0.00000375, @@ -29633,8 +30662,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "vertex_ai/claude-3-haiku": { "input_cost_per_token": 2.5e-7, @@ -29779,8 +30807,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "vertex_ai/claude-opus-4-1": { "cache_creation_input_token_cost": 0.00001875, @@ -29834,14 +30861,13 @@ "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, - "supports_minimal_reasoning_effort": true, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_output_config": true }, "vertex_ai/claude-opus-4-5@20251101": { "cache_creation_input_token_cost": 0.00000625, @@ -29861,15 +30887,14 @@ "supports_assistant_prefill": true, "supports_computer_use": true, "supports_function_calling": true, - "supports_minimal_reasoning_effort": true, "supports_pdf_input": true, "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 159, - "supports_native_streaming": true + "supports_native_streaming": true, + "supports_output_config": true }, "vertex_ai/claude-opus-4-6": { "cache_creation_input_token_cost": 0.00000625, @@ -29895,9 +30920,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, - "supports_max_reasoning_effort": true, - "supports_minimal_reasoning_effort": true + "supports_output_config": true, + "supports_max_reasoning_effort": true }, "vertex_ai/claude-opus-4-6@default": { "cache_creation_input_token_cost": 0.00000625, @@ -29923,9 +30947,8 @@ "supports_response_schema": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, - "supports_max_reasoning_effort": true, - "supports_minimal_reasoning_effort": true + "supports_output_config": true, + "supports_max_reasoning_effort": true }, "vertex_ai/claude-opus-4-7": { "cache_creation_input_token_cost": 0.00000625, @@ -29952,9 +30975,7 @@ "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, - "tool_use_system_prompt_tokens": 346, - "supports_max_reasoning_effort": true, - "supports_minimal_reasoning_effort": true + "supports_max_reasoning_effort": true }, "vertex_ai/claude-opus-4-7@default": { "cache_creation_input_token_cost": 0.00000625, @@ -29981,9 +31002,63 @@ "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, - "tool_use_system_prompt_tokens": 346, - "supports_max_reasoning_effort": true, - "supports_minimal_reasoning_effort": true + "supports_max_reasoning_effort": true + }, + "vertex_ai/claude-opus-4-8": { + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_1hr": 0.00001, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_max_reasoning_effort": true + }, + "vertex_ai/claude-opus-4-8@default": { + "cache_creation_input_token_cost": 0.00000625, + "cache_creation_input_token_cost_above_1hr": 0.00001, + "cache_read_input_token_cost": 5e-7, + "input_cost_per_token": 0.000005, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 0.000025, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_max_reasoning_effort": true }, "vertex_ai/claude-opus-4@20250514": { "cache_creation_input_token_cost": 0.00001875, @@ -30008,8 +31083,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "vertex_ai/claude-sonnet-4": { "cache_creation_input_token_cost": 0.00000375, @@ -30038,8 +31112,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "vertex_ai/claude-sonnet-4-5": { "cache_creation_input_token_cost": 0.00000375, @@ -30114,13 +31187,12 @@ "supports_max_reasoning_effort": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, "search_context_size_medium": 0.01 }, - "supports_minimal_reasoning_effort": true + "supports_output_config": true }, "vertex_ai/claude-sonnet-4-6@default": { "cache_creation_input_token_cost": 0.00000375, @@ -30142,13 +31214,12 @@ "supports_max_reasoning_effort": true, "supports_tool_choice": true, "supports_vision": true, - "tool_use_system_prompt_tokens": 346, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, "search_context_size_medium": 0.01 }, - "supports_minimal_reasoning_effort": true + "supports_output_config": true }, "vertex_ai/claude-sonnet-4@20250514": { "cache_creation_input_token_cost": 0.00000375, @@ -30177,8 +31248,7 @@ "supports_reasoning": true, "supports_response_schema": true, "supports_tool_choice": true, - "supports_vision": true, - "tool_use_system_prompt_tokens": 159 + "supports_vision": true }, "vertex_ai/codestral-2": { "input_cost_per_token": 3e-7, @@ -30457,6 +31527,73 @@ }, "web_search_billing_unit": "per_query" }, + "vertex_ai/gemini-3.1-flash-lite": { + "cache_read_input_token_cost": 2.5e-8, + "cache_read_input_token_cost_batches": 1.25e-8, + "cache_read_input_token_cost_flex": 1.25e-8, + "cache_read_input_token_cost_per_audio_token": 5e-8, + "cache_read_input_token_cost_priority": 4.5e-8, + "input_cost_per_audio_token": 5e-7, + "input_cost_per_token": 2.5e-7, + "input_cost_per_token_batches": 1.25e-7, + "input_cost_per_token_flex": 1.25e-7, + "input_cost_per_token_priority": 4.5e-7, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65536, + "max_pdf_size_mb": 30, + "max_tokens": 65536, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 0.0000015, + "output_cost_per_token": 0.0000015, + "output_cost_per_token_batches": 7.5e-7, + "output_cost_per_token_flex": 7.5e-7, + "output_cost_per_token_priority": 0.0000027, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_audio_output": false, + "supports_code_execution": true, + "supports_file_search": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_native_streaming": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query", + "supports_service_tier": true + }, "vertex_ai/gemini-3.1-flash-lite-preview": { "cache_read_input_token_cost": 2.5e-8, "cache_read_input_token_cost_per_audio_token": 5e-8, @@ -30642,6 +31779,64 @@ }, "web_search_billing_unit": "per_query" }, + "vertex_ai/gemini-3.5-flash": { + "cache_read_input_token_cost": 1.5e-7, + "input_cost_per_token": 0.0000015, + "input_cost_per_audio_token": 0.000001, + "litellm_provider": "vertex_ai", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 1048576, + "max_output_tokens": 65535, + "max_pdf_size_mb": 30, + "max_tokens": 65535, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "chat", + "output_cost_per_reasoning_token": 0.000009, + "output_cost_per_token": 0.000009, + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text" + ], + "supports_audio_input": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_video_input": true, + "supports_vision": true, + "supports_web_search": true, + "supports_native_streaming": true, + "input_cost_per_token_priority": 0.0000027, + "input_cost_per_audio_token_priority": 0.0000018, + "output_cost_per_token_priority": 0.0000162, + "cache_read_input_token_cost_priority": 2.7e-7, + "supports_service_tier": true, + "search_context_cost_per_query": { + "search_context_size_low": 0.014, + "search_context_size_medium": 0.014, + "search_context_size_high": 0.014 + }, + "web_search_billing_unit": "per_query" + }, "vertex_ai/jamba-1.5": { "input_cost_per_token": 2e-7, "litellm_provider": "vertex_ai-ai21_models", @@ -32414,6 +33609,48 @@ "supports_vision": true, "supports_web_search": true }, + "xai/grok-4.3": { + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "litellm_provider": "xai", + "max_input_tokens": 1000000, + "max_output_tokens": 1000000, + "max_tokens": 1000000, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "output_cost_per_token_above_200k_tokens": 0.000005, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "xai/grok-4.3-latest": { + "cache_read_input_token_cost": 2e-7, + "cache_read_input_token_cost_above_200k_tokens": 4e-7, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "litellm_provider": "xai", + "max_input_tokens": 1000000, + "max_output_tokens": 1000000, + "max_tokens": 1000000, + "mode": "chat", + "output_cost_per_token": 0.0000025, + "output_cost_per_token_above_200k_tokens": 0.000005, + "source": "https://docs.x.ai/docs/models", + "supports_function_calling": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, "xai/grok-beta": { "input_cost_per_token": 0.000005, "litellm_provider": "xai", From ea5e87b56f601f35539bd037279f2dcc6bcc0645 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 31 May 2026 14:54:27 -0700 Subject: [PATCH 28/37] feat: fix pipeline error in `cecli/io.py` by updating method signature to accept `**kwargs` Co-authored-by: cecli (openai/gemini_cli/gemini-2.5-pro) --- cecli/io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cecli/io.py b/cecli/io.py index 923c795466d..59ebdeeec36 100644 --- a/cecli/io.py +++ b/cecli/io.py @@ -553,7 +553,7 @@ def _spinner_supports_unicode(self) -> bool: except Exception: return False - def start_spinner(self, text, update_last_text=True): + def start_spinner(self, text, update_last_text=True, **kwargs): """Start the spinner.""" self.stop_spinner() From d7264d31f6677f292c9de9d11d67ea2504c24fb2 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 31 May 2026 21:32:39 -0400 Subject: [PATCH 29/37] Only run reflection after observation completes, pass coder instance to summarizer so tool list can be fully reconstructed so it doesn't destroy cache in background --- cecli/coders/base_coder.py | 2 +- cecli/helpers/observations/service.py | 29 ++++++++++++++++++--------- cecli/history.py | 6 ++++-- cecli/models.py | 15 ++++++++++++++ cecli/prompts/base.yml | 13 +++++++----- 5 files changed, 47 insertions(+), 18 deletions(-) diff --git a/cecli/coders/base_coder.py b/cecli/coders/base_coder.py index b6dc6d6f69f..3bff272ed5f 100755 --- a/cecli/coders/base_coder.py +++ b/cecli/coders/base_coder.py @@ -1914,6 +1914,7 @@ async def summarize_and_update(messages, tag): messages, compaction_prompt, self.context_compaction_summary_tokens, + coder=self, ) if not text: raise ValueError(f"Summarization of {tag} messages returned empty.") @@ -3272,7 +3273,6 @@ async def send(self, messages, model=None, functions=None, tools=None): functions, self.stream, self.temperature, - # This could include any tools, but for now it is just MCP tools tools=tools, override_kwargs=self.model_kwargs.copy(), interrupt_event=self.interrupt_event, diff --git a/cecli/helpers/observations/service.py b/cecli/helpers/observations/service.py index 14cd255255e..5a2a6ee4a05 100644 --- a/cecli/helpers/observations/service.py +++ b/cecli/helpers/observations/service.py @@ -38,6 +38,7 @@ def __init__(self, coder): self.observation_threshold = max((coder.context_compaction_max_tokens or 0) / 3, 20000) self.reflection_threshold = self.observation_threshold * 2 self.is_processing = False + self.is_reflecting = False self._last_observed_index = 0 self.observations = [] # Internal storage @@ -70,13 +71,6 @@ async def check_and_trigger(self): asyncio.create_task(self.run_observation(unobserved)) self._last_observed_index = len(cur_messages) - obs_tokens = coder.summarizer.count_tokens( - [{"role": "user", "content": o} for o in self.observations] - ) - - if obs_tokens >= self.reflection_threshold: - asyncio.create_task(self.run_reflection()) - async def run_observation(self, messages): coder = self.get_coder() if coder is None: @@ -87,9 +81,17 @@ async def run_observation(self, messages): all_messages = ConversationService.get_manager(coder).get_messages_dict() prompt = coder.gpt_prompts.observation_prompt observation = await coder.summarizer.summarize_all_as_text( - all_messages, prompt, max_tokens=8192 + all_messages, prompt, max_tokens=8192, coder=coder ) self.observations.append(self.format_observation(observation)) + + obs_tokens = coder.summarizer.count_tokens( + [{"role": "user", "content": o} for o in self.observations] + ) + + if obs_tokens >= self.reflection_threshold: + await self.run_reflection() + except asyncio.CancelledError: raise except Exception as e: @@ -102,8 +104,14 @@ async def run_reflection(self): if coder is None: return - self.is_processing = True + if self.is_reflecting: + return + + self.is_reflecting = True try: + if not self.observations: + return + # Prepare observations for the reflector obs_text = "\n".join([f"- {o}" for o in self.observations]) @@ -113,6 +121,7 @@ async def run_reflection(self): [{"role": "user", "content": obs_text}], reflection_prompt, max_tokens=8192, + coder=coder, ) # 1. Internal State Update: Store the condensed log internally @@ -124,7 +133,7 @@ async def run_reflection(self): except Exception as e: coder.io.tool_error(f"Error during reflection: {e}") finally: - self.is_processing = False + self.is_reflecting = False def reset(self): self.observations = [] diff --git a/cecli/history.py b/cecli/history.py index 74b61d735c4..df38797f331 100644 --- a/cecli/history.py +++ b/cecli/history.py @@ -135,12 +135,14 @@ async def summarize_all(self, messages): print(err) raise ValueError(err) - async def summarize_all_as_text(self, messages, prompt, max_tokens=None): + async def summarize_all_as_text(self, messages, prompt, max_tokens=None, coder=None): messages.append(dict(role="user", content=prompt)) for model in self.models: try: - summary = await model.simple_send_with_retries(messages, max_tokens=max_tokens) + summary = await model.simple_send_with_retries( + messages, max_tokens=max_tokens, coder=coder + ) if summary is not None: return summary except Exception as e: diff --git a/cecli/models.py b/cecli/models.py index 4148682b44b..dc0d66899c2 100644 --- a/cecli/models.py +++ b/cecli/models.py @@ -1333,19 +1333,34 @@ async def simple_send_with_retries( messages, max_tokens=None, override_kwargs={}, + coder=None, ): from cecli.exceptions import LiteLLMExceptions litellm_ex = LiteLLMExceptions() retry_delay = 0.125 + temperature = None + tools = None + if self.verbose: dump(messages) + + if coder: + temperature = coder.temperature + tools = coder.get_tool_list() + merged_kwargs = coder.model_kwargs.copy() + merged_kwargs.update(override_kwargs) + override_kwargs = merged_kwargs + while True: try: + _hash, response = await self.send_completion( messages=messages, functions=None, stream=False, + temperature=temperature, + tools=tools, max_tokens=max_tokens, override_kwargs=override_kwargs, ) diff --git a/cecli/prompts/base.yml b/cecli/prompts/base.yml index 11c54ad8096..19c788990f0 100644 --- a/cecli/prompts/base.yml +++ b/cecli/prompts/base.yml @@ -86,8 +86,10 @@ go_ahead_tip: "" compaction_prompt: | --- # Instruction: Context Compaction & State Preservation - The current conversation is exceeding the context limit. Transform this history into a "Mission Intent" summary that allows a new LLM instance to resume with zero loss of technical momentum. - + The current conversation is exceeding the context limit. + Transform this history into a "Mission Intent" summary that allows a new session to resume with zero loss of technical momentum. + Do not call any tools in your response. Just generate your summary. + ## Required Output Format: ### 1. Core Objective @@ -111,12 +113,13 @@ observation_prompt: | Use a bulleted list of concise, factual statements. Document the sequence of **outcomes and milestones** reached. Do not describe tool syntax; describe what was learned or changed in one sentence per bullet. + Do not call any tools in your response. Just generate your summary. reflection_prompt: | --- - Condense this observation log into a consolidated set of - key historical facts. Additionally, explicitly state the - CURRENT GOALS and NEXT STEPS based on the history. + Condense this observation log into a consolidated set of key historical facts. + Additionally, explicitly state the CURRENT GOALS and NEXT STEPS based on the history. + Do not call any tools in your response. Just generate your summary. Format as: OBSERVATIONS: From 83d78f1be97485767ff627d73bedcc08f15348a0 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 31 May 2026 22:17:04 -0400 Subject: [PATCH 30/37] Fix observation tests --- tests/helpers/observations/test_observation_service.py | 8 ++------ tests/subagents/test_commands.py | 2 +- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/tests/helpers/observations/test_observation_service.py b/tests/helpers/observations/test_observation_service.py index 667e17ef77d..d51b8087da8 100644 --- a/tests/helpers/observations/test_observation_service.py +++ b/tests/helpers/observations/test_observation_service.py @@ -83,9 +83,7 @@ async def test_compact_context_with_observations(): # 2. check_and_trigger: count_tokens(observations) # 3. compact_context_if_needed: done_tokens # 4. compact_context_if_needed: cur_tokens - # 5. compact_context_if_needed: diff_tokens - # 6. summarize_and_update: count_tokens inside - coder.summarizer.count_tokens.side_effect = [100, 100, 100, 1000, 0, 50] + coder.summarizer.count_tokens.side_effect = [100, 100, 1000, 0, 50] coder.summarizer.summarize_all_as_text = AsyncMock(return_value="Summary Text") # Mock manager @@ -146,9 +144,7 @@ async def test_compact_context_with_observations_integration(): # 2. check_and_trigger: obs # 3. compact: done # 4. compact: cur - # 5. compact: diff - # 6. summarize_and_update: inner - coder.summarizer.count_tokens.side_effect = [100, 100, 100, 1000, 0, 50] + coder.summarizer.count_tokens.side_effect = [100, 100, 1000, 0, 50] coder.summarizer.summarize_all_as_text = AsyncMock(return_value="Summary Text") # Mock manager diff --git a/tests/subagents/test_commands.py b/tests/subagents/test_commands.py index 4cb340c1c84..3413cd44867 100644 --- a/tests/subagents/test_commands.py +++ b/tests/subagents/test_commands.py @@ -31,7 +31,7 @@ async def test_valid_name_calls_spawn(self): with patch("cecli.helpers.agents.service.AgentService") as MockSvc: mock_instance = MagicMock() - mock_instance.spawn = AsyncMock() + mock_instance.spawn = AsyncMock(return_value=(MagicMock(), MagicMock())) MockSvc.get_instance.return_value = mock_instance await SpawnAgentCommand.execute(io, coder, "reviewer") From 69fd020cd6d18717e69094e61a63541bfcb520c1 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 31 May 2026 22:19:15 -0400 Subject: [PATCH 31/37] Add quiet parameter from PR #536 --- cecli/sessions.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cecli/sessions.py b/cecli/sessions.py index f1ee5a12570..2939431575c 100644 --- a/cecli/sessions.py +++ b/cecli/sessions.py @@ -88,7 +88,7 @@ def list_sessions(self) -> List[Dict]: return sessions - async def load_session(self, session_identifier: str, switch=True) -> bool: + async def load_session(self, session_identifier: str, switch=True, quiet: bool = False) -> bool: """Load a saved session by name or file path.""" if not session_identifier: self.io.tool_error("Please provide a session name or file path.") @@ -103,12 +103,14 @@ async def load_session(self, session_identifier: str, switch=True) -> bool: with open(session_file, "r", encoding="utf-8") as f: session_data = json.load(f) except Exception as e: - self.io.tool_error(f"Error loading session: {e}") + if not quiet: + self.io.tool_error(f"Error loading session: {e}") return False # Verify session format if not isinstance(session_data, dict) or "version" not in session_data: - self.io.tool_error("Invalid session format.") + if not quiet: + self.io.tool_error("Invalid session format.") return False # Apply session data From 8e2f52d123f5e2b9a275935761a9b7f5149a3a47 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 31 May 2026 22:42:38 -0400 Subject: [PATCH 32/37] Add session quiet test from PR #536 --- tests/basic/test_sessions.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/basic/test_sessions.py b/tests/basic/test_sessions.py index c6611e12909..4248c98b82b 100644 --- a/tests/basic/test_sessions.py +++ b/tests/basic/test_sessions.py @@ -57,6 +57,21 @@ def session_manager(mock_coder): return SessionManager(mock_coder, mock_coder.io) +@pytest.mark.asyncio +async def test_load_session_quiet_skips_tool_error_on_invalid_json( + session_manager, mock_coder, tmp_path +): + """BrightVision auto-load uses quiet=True when restore is best-effort.""" + session_dir = tmp_path / ".cecli" / "sessions" + os.makedirs(session_dir, exist_ok=True) + mock_coder.abs_root_path.side_effect = lambda x: str(tmp_path / x) + bad = session_dir / "bad.json" + bad.write_text("not json", encoding="utf-8") + + assert await session_manager.load_session(str(bad), switch=False, quiet=True) is False + mock_coder.io.tool_error.assert_not_called() + + def test_save_session(session_manager, mock_coder, tmp_path): """Test saving a session.""" session_dir = tmp_path / ".cecli" / "sessions" From 7aaf2085d9e397960297995a834a6f3a1513b948 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 31 May 2026 22:49:13 -0400 Subject: [PATCH 33/37] Repo map messaging adjustments from PR #536 --- cecli/repomap.py | 16 ++++++++++++---- tests/basic/test_repomap.py | 14 ++++++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/cecli/repomap.py b/cecli/repomap.py index 8c0f379d21c..1f721ef0ca3 100644 --- a/cecli/repomap.py +++ b/cecli/repomap.py @@ -746,6 +746,7 @@ def get_ranked_tags( num_fnames = len(fnames) fname_index = 0 + skipped_missing = 0 for fname in fnames: if self.verbose: self.io.tool_output(f"Processing {fname}") @@ -762,12 +763,14 @@ def get_ranked_tags( file_ok = False if not file_ok: + skipped_missing += 1 if fname not in self.warned_files: - self.io.tool_warning(f"Repo-map can't include {fname}") - self.io.tool_output( - "Has it been deleted from the file system but not from git?" - ) self.warned_files.add(fname) + if skipped_missing <= 2: + self.io.tool_warning( + f"Repo-map skipping missing file: {fname}" + " (removed on disk or not yet written)." + ) continue # dump(fname) @@ -843,6 +846,11 @@ def get_ranked_tags( if tag.specific_kind == "import": file_imports[rel_fname].add(tag.name) + if skipped_missing > 2: + self.io.tool_output( + f"Repo-map skipped {skipped_missing} paths that are not readable on disk." + ) + self.io.profile("Process Files") if self.use_enhanced_map and len(file_imports) > 0: diff --git a/tests/basic/test_repomap.py b/tests/basic/test_repomap.py index cae2c122ad0..53079604a04 100644 --- a/tests/basic/test_repomap.py +++ b/tests/basic/test_repomap.py @@ -50,6 +50,20 @@ def test_get_repo_map(self): # close the open cache files, so Windows won't error del repo_map + def test_repomap_resolves_relative_paths(self): + """Relative paths from git status must resolve against repo_root.""" + with IgnorantTemporaryDirectory() as temp_dir: + py_file = os.path.join(temp_dir, "module.py") + with open(py_file, "w", encoding="utf-8") as f: + f.write("def helper():\n return 1\n") + + io = InputOutput() + repo_map = RepoMap(main_model=self.GPT35, io=io, repo_root=temp_dir) + ranked = repo_map.get_ranked_tags([], ["module.py"], set(), set(), progress=False) + assert ranked is not None + assert len(ranked) > 0 + del repo_map + def test_repo_map_refresh_files(self): with GitTemporaryDirectory() as temp_dir: repo = git.Repo(temp_dir, odbt=git.GitCmdObjectDB) From 7ee2920feec3002a394c6f99f17aa0f8736355e3 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 31 May 2026 23:26:13 -0400 Subject: [PATCH 34/37] Add `--exempt-paths` to allow for the behavior in PR #532 but more generally --- cecli/args.py | 11 +++++++++++ cecli/commands/add.py | 14 ++++++++++++++ tests/basic/test_commands.py | 17 +++++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/cecli/args.py b/cecli/args.py index 387f4764e78..f095f594e06 100644 --- a/cecli/args.py +++ b/cecli/args.py @@ -536,6 +536,17 @@ def get_parser(default_config_files, git_root): " False)" ), ) + group.add_argument( + "--exempt-paths", + action="append", + metavar="REGEX", + default=[], + help=( + "Specify a regex pattern for paths that should be exempted from file creation. " + "When /add matches a path matching any exempt pattern, it will not offer to " + "create the file. Can be used multiple times." + ), + ) ########## group = parser.add_argument_group("Output settings") group.add_argument( diff --git a/cecli/commands/add.py b/cecli/commands/add.py index 2889841af54..c4a4e31d15b 100644 --- a/cecli/commands/add.py +++ b/cecli/commands/add.py @@ -71,6 +71,20 @@ async def execute(cls, io, coder, args, **kwargs): if len(confirm_fname) > 64: confirm_fname = f".../{os.path.basename(confirm_fname)}" + # Check if the path matches any exempt-path regex patterns + exempt_paths = getattr(coder.args, "exempt_paths", None) or [] + if exempt_paths: + try: + rel_norm = os.path.relpath(fname, coder.root).replace("\\", "/") + except ValueError: + rel_norm = str(fname).replace("\\", "/") + if any(re.search(p, rel_norm) for p in exempt_paths): + io.tool_error( + f"Path '{confirm_fname}' matches an exempt-path pattern. " + "Skipping file creation." + ) + continue + if await io.confirm_ask( f"No files matched '{confirm_fname}'. Do you want to create this file?" ): diff --git a/tests/basic/test_commands.py b/tests/basic/test_commands.py index 3955486ec87..c62f8ec26fc 100644 --- a/tests/basic/test_commands.py +++ b/tests/basic/test_commands.py @@ -231,6 +231,23 @@ async def test_cmd_add_no_match_but_make_it(self): self.assertEqual(len(coder.abs_fnames), 1) self.assertTrue(fname.exists()) + async def test_cmd_add_skips_create_on_exempt_path(self): + """Test that /add skips file creation for paths matching exempt-path regex patterns.""" + io = InputOutput(pretty=False, fancy_input=False, yes=True) + from types import SimpleNamespace + + from cecli.coders import Coder + + args = SimpleNamespace(exempt_paths=[r"\.[^/]+/attachments"]) + coder = await Coder.create(self.GPT35, None, io, args=args) + commands = Commands(io, coder) + + staging = Path(".cecli/attachments/missing.png") + commands.execute("add", str(staging)) + + self.assertEqual(len(coder.abs_fnames), 0) + self.assertFalse(staging.exists()) + async def test_cmd_add_drop_directory(self): # Initialize the Commands and InputOutput objects io = InputOutput(pretty=False, fancy_input=False, yes=False) From 4cd3c67caac0d2452ac099b53c26487386336f0b Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 31 May 2026 23:32:19 -0400 Subject: [PATCH 35/37] Remove regex env var from exempt paths argument --- cecli/args.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cecli/args.py b/cecli/args.py index f095f594e06..862ef35bfaa 100644 --- a/cecli/args.py +++ b/cecli/args.py @@ -539,7 +539,6 @@ def get_parser(default_config_files, git_root): group.add_argument( "--exempt-paths", action="append", - metavar="REGEX", default=[], help=( "Specify a regex pattern for paths that should be exempted from file creation. " From e9deccabf43f38f6048b15b40a1c2db42532ac81 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 31 May 2026 23:36:56 -0400 Subject: [PATCH 36/37] Fix response name shadowing in mcp tool parsing --- cecli/coders/agent_coder.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cecli/coders/agent_coder.py b/cecli/coders/agent_coder.py index 8d77628ce25..d57cfad9a9d 100644 --- a/cecli/coders/agent_coder.py +++ b/cecli/coders/agent_coder.py @@ -839,7 +839,7 @@ async def gather_and_await(): async def _execute_mcp_tools(self, server, tool_calls): """Execute MCP tools via LiteLLM.""" - responses = [] + tool_responses = [] for tool_call in tool_calls: # Use existing _execute_mcp_tool logic result = await self._execute_mcp_tool( @@ -847,14 +847,14 @@ async def _execute_mcp_tools(self, server, tool_calls): tool_call.function.name, responses.parse_tool_arguments(tool_call.function.arguments), ) - responses.append( + tool_responses.append( { "role": "tool", "tool_call_id": tool_call.id, "content": result, } ) - return responses + return tool_responses def get_active_model(self): if self.main_model.agent_model: From e0c81f1e28500097b772ece3c34a56b16ffcdcd8 Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 1 Jun 2026 00:36:58 -0400 Subject: [PATCH 37/37] Propagate quiet parameter to session read method --- cecli/sessions.py | 137 ++++++++++++++++++++++++---------------------- 1 file changed, 71 insertions(+), 66 deletions(-) diff --git a/cecli/sessions.py b/cecli/sessions.py index 7f9ad03203d..969bb816638 100644 --- a/cecli/sessions.py +++ b/cecli/sessions.py @@ -17,71 +17,6 @@ def __init__(self, coder, io): self.coder = coder self.io = io - def _get_session_directory(self) -> Path: - """Get the session directory, creating it if necessary.""" - session_dir = Path(self.coder.abs_root_path(".cecli/sessions")) - os.makedirs(session_dir, exist_ok=True) - return session_dir - - def _session_encrypt_settings(self) -> tuple[bool, bytes | None]: - args = getattr(self.coder, "args", None) - if not args or not getattr(args, "session_encrypt", False): - return False, None - key_file = getattr(args, "session_key_file", None) - return True, session_crypto.resolve_key(key_file=key_file) - - def _read_session_file(self, session_file: Path) -> dict | None: - try: - data = session_file.read_bytes() - except OSError as e: - self.io.tool_error(f"Error reading session: {e}") - return None - try: - if session_crypto.is_encrypted_payload(data): - args = getattr(self.coder, "args", None) - key_file = getattr(args, "session_key_file", None) if args else None - key = session_crypto.resolve_key(key_file=key_file) - if not key: - self.io.tool_error( - "Session is encrypted but no key is configured " - f"({session_crypto.KEY_ENV} or --session-key-file)." - ) - return None - return session_crypto.decrypt_session_bytes(data, key) - parsed = json.loads(data.decode("utf-8")) - if not isinstance(parsed, dict): - self.io.tool_error("Invalid session format.") - return None - return parsed - except session_crypto.SessionCryptoError as e: - self.io.tool_error(str(e)) - return None - except (UnicodeDecodeError, json.JSONDecodeError) as e: - self.io.tool_error(f"Error loading session: {e}") - return None - - def _write_session_file(self, session_file: Path, session_data: dict) -> bool: - encrypt_enabled, key = self._session_encrypt_settings() - try: - if encrypt_enabled: - if not key: - self.io.tool_error( - "Session encryption is enabled but no key is configured " - f"({session_crypto.KEY_ENV} or --session-key-file)." - ) - return False - session_file.write_bytes(session_crypto.encrypt_session_dict(session_data, key)) - else: - with open(session_file, "w", encoding="utf-8") as f: - json.dump(session_data, f, indent=2) - return True - except session_crypto.SessionCryptoError as e: - self.io.tool_error(str(e)) - return False - except OSError as e: - self.io.tool_error(f"Error saving session: {e}") - return False - def save_session(self, session_name: str, output=True) -> bool: """Save the current chat session to a named file.""" if not session_name: @@ -180,7 +115,7 @@ async def load_session(self, session_identifier: str, switch=True, quiet: bool = if not session_file: return False - session_data = self._read_session_file(session_file) + session_data = self._read_session_file(session_file, quiet=quiet) if session_data is None: return False @@ -207,6 +142,76 @@ async def load_session(self, session_identifier: str, switch=True, quiet: bool = ) return applied + def _get_session_directory(self) -> Path: + """Get the session directory, creating it if necessary.""" + session_dir = Path(self.coder.abs_root_path(".cecli/sessions")) + os.makedirs(session_dir, exist_ok=True) + return session_dir + + def _session_encrypt_settings(self) -> tuple[bool, bytes | None]: + args = getattr(self.coder, "args", None) + if not args or not getattr(args, "session_encrypt", False): + return False, None + key_file = getattr(args, "session_key_file", None) + return True, session_crypto.resolve_key(key_file=key_file) + + def _read_session_file(self, session_file: Path, quiet: bool = False) -> dict | None: + try: + data = session_file.read_bytes() + except OSError as e: + if not quiet: + self.io.tool_error(f"Error reading session: {e}") + return None + try: + if session_crypto.is_encrypted_payload(data): + args = getattr(self.coder, "args", None) + key_file = getattr(args, "session_key_file", None) if args else None + key = session_crypto.resolve_key(key_file=key_file) + if not key: + if not quiet: + self.io.tool_error( + "Session is encrypted but no key is configured " + f"({session_crypto.KEY_ENV} or --session-key-file)." + ) + return None + return session_crypto.decrypt_session_bytes(data, key) + parsed = json.loads(data.decode("utf-8")) + if not isinstance(parsed, dict): + if not quiet: + self.io.tool_error("Invalid session format.") + return None + return parsed + except session_crypto.SessionCryptoError as e: + if not quiet: + self.io.tool_error(str(e)) + return None + except (UnicodeDecodeError, json.JSONDecodeError) as e: + if not quiet: + self.io.tool_error(f"Error loading session: {e}") + return None + + def _write_session_file(self, session_file: Path, session_data: dict) -> bool: + encrypt_enabled, key = self._session_encrypt_settings() + try: + if encrypt_enabled: + if not key: + self.io.tool_error( + "Session encryption is enabled but no key is configured " + f"({session_crypto.KEY_ENV} or --session-key-file)." + ) + return False + session_file.write_bytes(session_crypto.encrypt_session_dict(session_data, key)) + else: + with open(session_file, "w", encoding="utf-8") as f: + json.dump(session_data, f, indent=2) + return True + except session_crypto.SessionCryptoError as e: + self.io.tool_error(str(e)) + return False + except OSError as e: + self.io.tool_error(f"Error saving session: {e}") + return False + def _build_session_data(self, session_name) -> Dict: """Build session data dictionary from current coder state.""" # Get relative paths for all files