From 7aa552e97df4120a6bd891e9d06335c35d5bd3f8 Mon Sep 17 00:00:00 2001
From: Armando Vaquera <263793884+proyectoauraorg@users.noreply.github.com>
Date: Sun, 24 May 2026 14:34:22 -0600
Subject: [PATCH 1/4] feat(openai): support codex models via the Responses API
 (#87)

Azure-hosted GPT-5.x codex models (e.g. gpt-5.3-codex) reject the Chat
Completions API and require the Responses API, so they could not be used
via the OpenAI-compatible provider, which routed everything through
chat.completions.create.

createMessage/completePrompt now detect codex models and route them through
the Responses API (responses.create) with streaming, Anthropic->Responses
input conversion, and flat tool-schema conversion. Non-codex models keep
using chat.completions unchanged. Ports RooCode PR #11952 (unmerged upstream).

Adds openai-codex-responses.spec.ts (13 tests). Existing openai tests
unchanged.
---
 .../__tests__/openai-codex-responses.spec.ts  | 465 +++++++++++++++++
 src/api/providers/openai.ts                   | 474 +++++++++++++++++-
 2 files changed, 937 insertions(+), 2 deletions(-)
 create mode 100644 src/api/providers/__tests__/openai-codex-responses.spec.ts

diff --git a/src/api/providers/__tests__/openai-codex-responses.spec.ts b/src/api/providers/__tests__/openai-codex-responses.spec.ts
new file mode 100644
index 000000000..6e38086f4
--- /dev/null
+++ b/src/api/providers/__tests__/openai-codex-responses.spec.ts
@@ -0,0 +1,465 @@
+// npx vitest run api/providers/__tests__/openai-codex-responses.spec.ts
+
+import { OpenAiHandler } from "../openai"
+import { ApiHandlerOptions } from "../../../shared/api"
+import { Anthropic } from "@anthropic-ai/sdk"
+import { openAiModelInfoSaneDefaults } from "@roo-code/types"
+
+const mockChatCreate = vitest.fn()
+const mockResponsesCreate = vitest.fn()
+
+vitest.mock("openai", () => {
+	const mockConstructor = vitest.fn()
+	return {
+		__esModule: true,
+		default: mockConstructor.mockImplementation(() => ({
+			chat: {
+				completions: {
+					create: mockChatCreate,
+				},
+			},
+			responses: {
+				create: mockResponsesCreate,
+			},
+		})),
+		AzureOpenAI: mockConstructor.mockImplementation(() => ({
+			chat: {
+				completions: {
+					create: mockChatCreate,
+				},
+			},
+			responses: {
+				create: mockResponsesCreate,
+			},
+		})),
+	}
+})
+
+describe("OpenAiHandler - Codex model detection", () => {
+	let handler: OpenAiHandler
+
+	beforeEach(() => {
+		mockChatCreate.mockClear()
+		mockResponsesCreate.mockClear()
+	})
+
+	describe("_isCodexModel", () => {
+		it("should detect gpt-5.3-codex as a codex model", () => {
+			handler = new OpenAiHandler({
+				openAiApiKey: "test-key",
+				openAiModelId: "gpt-5.3-codex",
+				openAiBaseUrl: "https://test.openai.azure.com/openai/deployments/gpt5.3",
+				openAiUseAzure: true,
+			})
+			// Access the protected method via any cast
+			expect((handler as any)._isCodexModel("gpt-5.3-codex")).toBe(true)
+		})
+
+		it("should detect gpt-5.1-codex as a codex model", () => {
+			handler = new OpenAiHandler({
+				openAiApiKey: "test-key",
+				openAiModelId: "gpt-5.1-codex",
+			})
+			expect((handler as any)._isCodexModel("gpt-5.1-codex")).toBe(true)
+		})
+
+		it("should detect codex in a case-insensitive manner", () => {
+			handler = new OpenAiHandler({
+				openAiApiKey: "test-key",
+				openAiModelId: "GPT-5.3-CODEX",
+			})
+			expect((handler as any)._isCodexModel("GPT-5.3-CODEX")).toBe(true)
+		})
+
+		it("should not detect regular models as codex", () => {
+			handler = new OpenAiHandler({
+				openAiApiKey: "test-key",
+				openAiModelId: "gpt-4",
+			})
+			expect((handler as any)._isCodexModel("gpt-4")).toBe(false)
+			expect((handler as any)._isCodexModel("gpt-4o")).toBe(false)
+			expect((handler as any)._isCodexModel("o3-mini")).toBe(false)
+		})
+	})
+
+	describe("createMessage with codex model", () => {
+		it("should use Responses API for codex models instead of Chat Completions", async () => {
+			handler = new OpenAiHandler({
+				openAiApiKey: "test-key",
+				openAiModelId: "gpt-5.3-codex",
+				openAiBaseUrl: "https://test.openai.azure.com/openai/deployments/gpt5.3",
+				openAiUseAzure: true,
+			})
+
+			// Mock the responses.create to return a streaming async iterable
+			mockResponsesCreate.mockResolvedValue({
+				[Symbol.asyncIterator]: async function* () {
+					yield {
+						type: "response.output_text.delta",
+						delta: "Hello from codex!",
+					}
+					yield {
+						type: "response.done",
+						response: {
+							usage: {
+								input_tokens: 10,
+								output_tokens: 5,
+							},
+						},
+					}
+				},
+			})
+
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
+
+			const chunks: any[] = []
+			for await (const chunk of handler.createMessage("You are a helpful assistant", messages, {
+				taskId: "test",
+			})) {
+				chunks.push(chunk)
+			}
+
+			// Verify responses.create was called, NOT chat.completions.create
+			expect(mockResponsesCreate).toHaveBeenCalledTimes(1)
+			expect(mockChatCreate).not.toHaveBeenCalled()
+
+			// Verify the request body structure
+			const requestBody = mockResponsesCreate.mock.calls[0][0]
+			expect(requestBody.model).toBe("gpt-5.3-codex")
+			expect(requestBody.stream).toBe(true)
+			expect(requestBody.instructions).toBe("You are a helpful assistant")
+			expect(requestBody.input).toBeDefined()
+			expect(Array.isArray(requestBody.input)).toBe(true)
+
+			// Verify chunks
+			const textChunks = chunks.filter((c) => c.type === "text")
+			expect(textChunks.length).toBe(1)
+			expect(textChunks[0].text).toBe("Hello from codex!")
+
+			const usageChunks = chunks.filter((c) => c.type === "usage")
+			expect(usageChunks.length).toBe(1)
+			expect(usageChunks[0].inputTokens).toBe(10)
+			expect(usageChunks[0].outputTokens).toBe(5)
+		})
+
+		it("should use Chat Completions for non-codex models", async () => {
+			handler = new OpenAiHandler({
+				openAiApiKey: "test-key",
+				openAiModelId: "gpt-4",
+				openAiBaseUrl: "https://api.openai.com/v1",
+			})
+
+			mockChatCreate.mockResolvedValue({
+				[Symbol.asyncIterator]: async function* () {
+					yield {
+						choices: [{ delta: { content: "Hello" }, index: 0 }],
+						usage: null,
+					}
+					yield {
+						choices: [{ delta: {}, index: 0 }],
+						usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 },
+					}
+				},
+			})
+
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }]
+
+			const chunks: any[] = []
+			for await (const chunk of handler.createMessage("System", messages, { taskId: "test" })) {
+				chunks.push(chunk)
+			}
+
+			// Verify chat.completions.create was called, NOT responses.create
+			expect(mockChatCreate).toHaveBeenCalledTimes(1)
+			expect(mockResponsesCreate).not.toHaveBeenCalled()
+		})
+	})
+
+	describe("createMessage codex conversation formatting", () => {
+		it("should format conversation with tool use correctly for Responses API", async () => {
+			handler = new OpenAiHandler({
+				openAiApiKey: "test-key",
+				openAiModelId: "gpt-5.3-codex",
+				openAiBaseUrl: "https://test.openai.azure.com/openai/deployments/gpt5.3",
+				openAiUseAzure: true,
+			})
+
+			mockResponsesCreate.mockResolvedValue({
+				[Symbol.asyncIterator]: async function* () {
+					yield {
+						type: "response.output_text.delta",
+						delta: "Done.",
+					}
+					yield {
+						type: "response.done",
+						response: {
+							usage: { input_tokens: 20, output_tokens: 3 },
+						},
+					}
+				},
+			})
+
+			const messages: Anthropic.Messages.MessageParam[] = [
+				{ role: "user", content: "What is 1+1?" },
+				{
+					role: "assistant",
+					content: [
+						{ type: "text", text: "Let me calculate that." },
+						{ type: "tool_use", id: "call_123", name: "calculator", input: { expression: "1+1" } },
+					],
+				},
+				{
+					role: "user",
+					content: [
+						{
+							type: "tool_result",
+							tool_use_id: "call_123",
+							content: "2",
+						},
+					],
+				},
+			]
+
+			for await (const _chunk of handler.createMessage("You are helpful", messages, { taskId: "test" })) {
+				// consume
+			}
+
+			const requestBody = mockResponsesCreate.mock.calls[0][0]
+			const input = requestBody.input
+
+			// First item: user message
+			expect(input[0].role).toBe("user")
+			expect(input[0].content[0].type).toBe("input_text")
+			expect(input[0].content[0].text).toBe("What is 1+1?")
+
+			// Second item: assistant text
+			expect(input[1].role).toBe("assistant")
+			expect(input[1].content[0].type).toBe("output_text")
+
+			// Third item: function_call
+			expect(input[2].type).toBe("function_call")
+			expect(input[2].name).toBe("calculator")
+
+			// Fourth item: function_call_output
+			expect(input[3].type).toBe("function_call_output")
+			expect(input[3].output).toBe("2")
+		})
+	})
+
+	describe("createMessage codex tool call streaming", () => {
+		it("should handle tool call events from the Responses API", async () => {
+			handler = new OpenAiHandler({
+				openAiApiKey: "test-key",
+				openAiModelId: "gpt-5.3-codex",
+				openAiUseAzure: true,
+			})
+
+			mockResponsesCreate.mockResolvedValue({
+				[Symbol.asyncIterator]: async function* () {
+					yield {
+						type: "response.output_item.added",
+						item: {
+							type: "function_call",
+							call_id: "call_abc",
+							name: "read_file",
+						},
+					}
+					yield {
+						type: "response.function_call_arguments.delta",
+						call_id: "call_abc",
+						name: "read_file",
+						delta: '{"path":',
+						index: 0,
+					}
+					yield {
+						type: "response.function_call_arguments.delta",
+						call_id: "call_abc",
+						name: "read_file",
+						delta: '"test.ts"}',
+						index: 0,
+					}
+					yield {
+						type: "response.function_call_arguments.done",
+						call_id: "call_abc",
+					}
+					yield {
+						type: "response.done",
+						response: {
+							usage: { input_tokens: 5, output_tokens: 10 },
+						},
+					}
+				},
+			})
+
+			const chunks: any[] = []
+			for await (const chunk of handler.createMessage("System", [{ role: "user", content: "Read test.ts" }], {
+				taskId: "test",
+			})) {
+				chunks.push(chunk)
+			}
+
+			const partialCalls = chunks.filter((c) => c.type === "tool_call_partial")
+			expect(partialCalls.length).toBe(2)
+			expect(partialCalls[0].id).toBe("call_abc")
+			expect(partialCalls[0].name).toBe("read_file")
+			expect(partialCalls[0].arguments).toBe('{"path":')
+			expect(partialCalls[1].arguments).toBe('"test.ts"}')
+		})
+
+		it("should handle complete tool calls from output_item.done", async () => {
+			handler = new OpenAiHandler({
+				openAiApiKey: "test-key",
+				openAiModelId: "gpt-5.3-codex",
+				openAiUseAzure: true,
+			})
+
+			mockResponsesCreate.mockResolvedValue({
+				[Symbol.asyncIterator]: async function* () {
+					yield {
+						type: "response.output_item.added",
+						item: {
+							type: "function_call",
+							call_id: "call_xyz",
+							name: "write_file",
+						},
+					}
+					yield {
+						type: "response.output_item.done",
+						item: {
+							type: "function_call",
+							call_id: "call_xyz",
+							name: "write_file",
+							arguments: '{"path":"out.txt","content":"hello"}',
+						},
+					}
+					yield {
+						type: "response.done",
+						response: {
+							usage: { input_tokens: 5, output_tokens: 10 },
+						},
+					}
+				},
+			})
+
+			const chunks: any[] = []
+			for await (const chunk of handler.createMessage("System", [{ role: "user", content: "Write file" }], {
+				taskId: "test",
+			})) {
+				chunks.push(chunk)
+			}
+
+			const toolCalls = chunks.filter((c) => c.type === "tool_call")
+			expect(toolCalls.length).toBe(1)
+			expect(toolCalls[0].id).toBe("call_xyz")
+			expect(toolCalls[0].name).toBe("write_file")
+			expect(toolCalls[0].arguments).toBe('{"path":"out.txt","content":"hello"}')
+		})
+	})
+
+	describe("completePrompt with codex model", () => {
+		it("should use Responses API for codex models in completePrompt", async () => {
+			handler = new OpenAiHandler({
+				openAiApiKey: "test-key",
+				openAiModelId: "gpt-5.3-codex",
+				openAiBaseUrl: "https://test.openai.azure.com/openai/deployments/gpt5.3",
+				openAiUseAzure: true,
+			})
+
+			mockResponsesCreate.mockResolvedValue({
+				output: [
+					{
+						type: "message",
+						content: [
+							{
+								type: "output_text",
+								text: "Completed prompt response",
+							},
+						],
+					},
+				],
+			})
+
+			const result = await handler.completePrompt("Complete this")
+
+			expect(mockResponsesCreate).toHaveBeenCalledTimes(1)
+			expect(mockChatCreate).not.toHaveBeenCalled()
+
+			const requestBody = mockResponsesCreate.mock.calls[0][0]
+			expect(requestBody.model).toBe("gpt-5.3-codex")
+			expect(requestBody.stream).toBe(false)
+			expect(requestBody.input[0].role).toBe("user")
+
+			expect(result).toBe("Completed prompt response")
+		})
+
+		it("should use Chat Completions for non-codex models in completePrompt", async () => {
+			handler = new OpenAiHandler({
+				openAiApiKey: "test-key",
+				openAiModelId: "gpt-4",
+				openAiBaseUrl: "https://api.openai.com/v1",
+			})
+
+			mockChatCreate.mockResolvedValue({
+				choices: [
+					{
+						message: { role: "assistant", content: "Chat completion response" },
+						finish_reason: "stop",
+						index: 0,
+					},
+				],
+				usage: { prompt_tokens: 5, completion_tokens: 3, total_tokens: 8 },
+			})
+
+			const result = await handler.completePrompt("Complete this")
+
+			expect(mockChatCreate).toHaveBeenCalledTimes(1)
+			expect(mockResponsesCreate).not.toHaveBeenCalled()
+			expect(result).toBe("Chat completion response")
+		})
+	})
+
+	describe("createMessage codex error handling", () => {
+		it("should handle API errors from Responses API", async () => {
+			handler = new OpenAiHandler({
+				openAiApiKey: "test-key",
+				openAiModelId: "gpt-5.3-codex",
+				openAiUseAzure: true,
+			})
+
+			mockResponsesCreate.mockRejectedValue(new Error("API rate limit exceeded"))
+
+			await expect(async () => {
+				for await (const _chunk of handler.createMessage("System", [{ role: "user", content: "Hello" }], {
+					taskId: "test",
+				})) {
+					// consume
+				}
+			}).rejects.toThrow()
+		})
+
+		it("should handle error events in the stream", async () => {
+			handler = new OpenAiHandler({
+				openAiApiKey: "test-key",
+				openAiModelId: "gpt-5.3-codex",
+				openAiUseAzure: true,
+			})
+
+			mockResponsesCreate.mockResolvedValue({
+				[Symbol.asyncIterator]: async function* () {
+					yield {
+						type: "response.error",
+						error: { message: "Something went wrong" },
+					}
+				},
+			})
+
+			await expect(async () => {
+				for await (const _chunk of handler.createMessage("System", [{ role: "user", content: "Hello" }], {
+					taskId: "test",
+				})) {
+					// consume
+				}
+			}).rejects.toThrow("Responses API error: Something went wrong")
+		})
+	})
+})
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index 7ea33196f..c362204ca 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -13,6 +13,8 @@ import {
 import type { ApiHandlerOptions } from "../../shared/api"
 
 import { TagMatcher } from "../../utils/tag-matcher"
+import { sanitizeOpenAiCallId } from "../../utils/tool-id"
+import { isMcpTool } from "../../utils/mcp-name"
 
 import { convertToOpenAiMessages } from "../transform/openai-format"
 import { convertToR1Format } from "../transform/r1-format"
@@ -91,6 +93,11 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		const isAzureAiInference = this._isAzureAiInference(modelUrl)
 		const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format
 
+		if (this._isCodexModel(modelId)) {
+			yield* this.handleCodexMessage(systemPrompt, messages, metadata)
+			return
+		}
+
 		if (modelId.includes("o1") || modelId.includes("o3") || modelId.includes("o4")) {
 			yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages, metadata)
 			return
@@ -300,12 +307,19 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 
 	async completePrompt(prompt: string): Promise<string> {
 		try {
-			const isAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl)
 			const model = this.getModel()
+			const modelId = model.id
 			const modelInfo = model.info
 
+			// Codex models must use the Responses API
+			if (this._isCodexModel(modelId)) {
+				return this._completePromptWithResponsesApi(prompt, model)
+			}
+
+			const isAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl)
+
 			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
-				model: model.id,
+				model: modelId,
 				messages: [{ role: "user", content: prompt }],
 			}
 
@@ -332,6 +346,58 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		}
 	}
 
+	/**
+	 * Complete a prompt using the Responses API (for codex models).
+	 */
+	private async _completePromptWithResponsesApi(
+		prompt: string,
+		model: ReturnType<OpenAiHandler["getModel"]>,
+	): Promise<string> {
+		const requestBody: any = {
+			model: model.id,
+			input: [
+				{
+					role: "user",
+					content: [{ type: "input_text", text: prompt }],
+				},
+			],
+			stream: false,
+			store: false,
+		}
+
+		// Add max_output_tokens if needed
+		if (this.options.includeMaxTokens === true) {
+			requestBody.max_output_tokens = this.options.modelMaxTokens || model.info.maxTokens
+		}
+
+		let response
+		try {
+			response = await (this.client as any).responses.create(requestBody)
+		} catch (error) {
+			throw handleOpenAIError(error, this.providerName)
+		}
+
+		// Extract text from the Responses API response
+		if (response?.output && Array.isArray(response.output)) {
+			for (const outputItem of response.output) {
+				if (outputItem.type === "message" && outputItem.content) {
+					for (const content of outputItem.content) {
+						if (content.type === "output_text" && content.text) {
+							return content.text
+						}
+					}
+				}
+			}
+		}
+
+		// Fallback: check for direct text in response
+		if (response?.text) {
+			return response.text
+		}
+
+		return ""
+	}
+
 	private async *handleO3FamilyMessage(
 		modelId: string,
 		systemPrompt: string,
@@ -502,6 +568,410 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		}
 	}
 
+	/**
+	 * Checks if the model is a codex model that requires the Responses API.
+	 * Azure-hosted GPT-5.x codex models (e.g., gpt-5.3-codex) do not support
+	 * the Chat Completions API and must use the Responses API instead.
+	 */
+	protected _isCodexModel(modelId: string): boolean {
+		return modelId.toLowerCase().includes("codex")
+	}
+
+	/**
+	 * Handles message creation for codex models using the OpenAI Responses API.
+	 * Codex models (e.g., gpt-5.3-codex on Azure) only support the Responses API,
+	 * not the Chat Completions API.
+	 */
+	private async *handleCodexMessage(
+		systemPrompt: string,
+		messages: Anthropic.Messages.MessageParam[],
+		metadata?: ApiHandlerCreateMessageMetadata,
+	): ApiStream {
+		const model = this.getModel()
+
+		// Format conversation for the Responses API
+		const formattedInput = this._formatConversationForResponsesApi(messages)
+
+		// Build tools in Responses API format (flat structure, not nested under function)
+		const tools = this._convertToolsForResponsesApi(metadata?.tools)
+
+		// Build the request body
+		const requestBody: any = {
+			model: model.id,
+			input: formattedInput,
+			stream: true,
+			store: false,
+			instructions: systemPrompt,
+			...(tools && tools.length > 0 ? { tools } : {}),
+			...(metadata?.tool_choice ? { tool_choice: metadata.tool_choice } : {}),
+			parallel_tool_calls: metadata?.parallelToolCalls ?? true,
+		}
+
+		// Add temperature
+		if (model.info.supportsTemperature !== false) {
+			requestBody.temperature = this.options.modelTemperature ?? 0
+		}
+
+		// Add max_output_tokens if needed
+		if (this.options.includeMaxTokens === true) {
+			requestBody.max_output_tokens = this.options.modelMaxTokens || model.info.maxTokens
+		}
+
+		// State tracking for streaming
+		let pendingToolCallId: string | undefined
+		let pendingToolCallName: string | undefined
+		let sawTextOutput = false
+		const streamedToolCallIds = new Set<string>()
+
+		try {
+			const stream = (await (this.client as any).responses.create(requestBody)) as AsyncIterable<any>
+
+			for await (const event of stream) {
+				// Handle text deltas
+				if (event?.type === "response.text.delta" || event?.type === "response.output_text.delta") {
+					if (event?.delta) {
+						sawTextOutput = true
+						yield { type: "text", text: event.delta }
+					}
+					continue
+				}
+
+				// Handle done-only text for variants that skip delta events
+				if (event?.type === "response.text.done" || event?.type === "response.output_text.done") {
+					const doneText =
+						typeof event?.text === "string"
+							? event.text
+							: typeof event?.output_text === "string"
+								? event.output_text
+								: undefined
+					if (!sawTextOutput && doneText) {
+						sawTextOutput = true
+						yield { type: "text", text: doneText }
+					}
+					continue
+				}
+
+				// Handle content part events
+				if (event?.type === "response.content_part.added" || event?.type === "response.content_part.done") {
+					const part = event?.part
+					if (
+						!sawTextOutput &&
+						(part?.type === "text" || part?.type === "output_text") &&
+						typeof part?.text === "string" &&
+						part.text
+					) {
+						sawTextOutput = true
+						yield { type: "text", text: part.text }
+					}
+					continue
+				}
+
+				// Handle reasoning deltas
+				if (
+					event?.type === "response.reasoning.delta" ||
+					event?.type === "response.reasoning_text.delta" ||
+					event?.type === "response.reasoning_summary.delta" ||
+					event?.type === "response.reasoning_summary_text.delta"
+				) {
+					if (event?.delta) {
+						yield { type: "reasoning", text: event.delta }
+					}
+					continue
+				}
+
+				// Handle refusal deltas
+				if (event?.type === "response.refusal.delta") {
+					if (event?.delta) {
+						sawTextOutput = true
+						yield { type: "text", text: `[Refusal] ${event.delta}` }
+					}
+					continue
+				}
+
+				// Handle output item events (track tool identity)
+				if (event?.type === "response.output_item.added" || event?.type === "response.output_item.done") {
+					const item = event?.item
+					if (item) {
+						// Capture tool identity for subsequent argument deltas
+						if (item.type === "function_call" || item.type === "tool_call") {
+							const callId = item.call_id || item.tool_call_id || item.id
+							const name = item.name || item.function?.name
+							if (typeof callId === "string" && callId.length > 0) {
+								pendingToolCallId = callId
+								pendingToolCallName = typeof name === "string" ? name : undefined
+							}
+						}
+
+						if (event.type === "response.output_item.added") {
+							if ((item.type === "text" || item.type === "output_text") && item.text) {
+								sawTextOutput = true
+								yield { type: "text", text: item.text }
+							} else if (item.type === "message" && Array.isArray(item.content)) {
+								for (const content of item.content) {
+									if (
+										(content?.type === "text" || content?.type === "output_text") &&
+										content?.text
+									) {
+										sawTextOutput = true
+										yield { type: "text", text: content.text }
+									}
+								}
+							}
+						} else if (
+							event.type === "response.output_item.done" &&
+							(item.type === "function_call" || item.type === "tool_call")
+						) {
+							const callId = item.call_id || item.tool_call_id || item.id
+							const name = item.name || item.function?.name
+							const argsRaw = item.arguments || item.function?.arguments || item.input
+							const args =
+								typeof argsRaw === "string"
+									? argsRaw
+									: argsRaw && typeof argsRaw === "object"
+										? JSON.stringify(argsRaw)
+										: ""
+
+							if (
+								typeof callId === "string" &&
+								callId.length > 0 &&
+								typeof name === "string" &&
+								name.length > 0 &&
+								!streamedToolCallIds.has(callId)
+							) {
+								yield { type: "tool_call", id: callId, name, arguments: args }
+							}
+						} else if (!sawTextOutput) {
+							if ((item.type === "text" || item.type === "output_text") && item.text) {
+								sawTextOutput = true
+								yield { type: "text", text: item.text }
+							} else if (item.type === "message" && Array.isArray(item.content)) {
+								for (const content of item.content) {
+									if (
+										(content?.type === "text" || content?.type === "output_text") &&
+										content?.text
+									) {
+										sawTextOutput = true
+										yield { type: "text", text: content.text }
+									}
+								}
+							}
+						}
+					}
+					continue
+				}
+
+				// Handle tool/function call argument deltas
+				if (
+					event?.type === "response.tool_call_arguments.delta" ||
+					event?.type === "response.function_call_arguments.delta"
+				) {
+					const callId = event.call_id || event.tool_call_id || event.id || pendingToolCallId || undefined
+					const name = event.name || event.function_name || pendingToolCallName || undefined
+					const args = event.delta || event.arguments
+
+					if (
+						typeof name === "string" &&
+						name.length > 0 &&
+						typeof callId === "string" &&
+						callId.length > 0
+					) {
+						streamedToolCallIds.add(callId)
+						yield {
+							type: "tool_call_partial",
+							index: event.index ?? 0,
+							id: callId,
+							name,
+							arguments: args,
+						}
+					}
+					continue
+				}
+
+				// Handle tool/function call completion
+				if (
+					event?.type === "response.tool_call_arguments.done" ||
+					event?.type === "response.function_call_arguments.done"
+				) {
+					continue
+				}
+
+				// Handle completion events with usage
+				if (event?.type === "response.done" || event?.type === "response.completed") {
+					// Fallback text extraction from final payload
+					if (!sawTextOutput && Array.isArray(event?.response?.output)) {
+						for (const outputItem of event.response.output) {
+							if (
+								(outputItem?.type === "text" || outputItem?.type === "output_text") &&
+								outputItem?.text
+							) {
+								sawTextOutput = true
+								yield { type: "text", text: outputItem.text }
+								continue
+							}
+							if (outputItem?.type === "message" && Array.isArray(outputItem.content)) {
+								for (const content of outputItem.content) {
+									if (
+										(content?.type === "text" || content?.type === "output_text") &&
+										content?.text
+									) {
+										sawTextOutput = true
+										yield { type: "text", text: content.text }
+									}
+								}
+							}
+						}
+					}
+
+					// Extract usage
+					const usage = event?.response?.usage || event?.usage
+					if (usage) {
+						yield {
+							type: "usage",
+							inputTokens: usage.input_tokens ?? usage.prompt_tokens ?? 0,
+							outputTokens: usage.output_tokens ?? usage.completion_tokens ?? 0,
+							cacheWriteTokens: usage.cache_creation_input_tokens || undefined,
+							cacheReadTokens: usage.cache_read_input_tokens || undefined,
+						}
+					}
+					continue
+				}
+
+				// Handle error events
+				if (event?.type === "response.error" || event?.type === "error") {
+					if (event.error || event.message) {
+						throw new Error(
+							`Responses API error: ${event.error?.message || event.message || "Unknown error"}`,
+						)
+					}
+				}
+
+				// Handle failed event
+				if (event?.type === "response.failed") {
+					if (event.error || event.message) {
+						throw new Error(
+							`Response failed: ${event.error?.message || event.message || "Unknown failure"}`,
+						)
+					}
+				}
+
+				// Fallback for older formats
+				if (event?.choices?.[0]?.delta?.content) {
+					yield { type: "text", text: event.choices[0].delta.content }
+				}
+
+				if (event?.usage) {
+					yield {
+						type: "usage",
+						inputTokens: event.usage.input_tokens ?? event.usage.prompt_tokens ?? 0,
+						outputTokens: event.usage.output_tokens ?? event.usage.completion_tokens ?? 0,
+					}
+				}
+			}
+		} catch (error) {
+			throw handleOpenAIError(error, this.providerName)
+		}
+	}
+
+	/**
+	 * Formats an Anthropic message array into the Responses API input format.
+	 */
+	private _formatConversationForResponsesApi(messages: Anthropic.Messages.MessageParam[]): any[] {
+		const formattedInput: any[] = []
+
+		for (const message of messages) {
+			if (message.role === "user") {
+				const content: any[] = []
+				const toolResults: any[] = []
+
+				if (typeof message.content === "string") {
+					content.push({ type: "input_text", text: message.content })
+				} else if (Array.isArray(message.content)) {
+					for (const block of message.content) {
+						if (block.type === "text") {
+							content.push({ type: "input_text", text: block.text })
+						} else if (block.type === "image") {
+							const image = block as Anthropic.Messages.ImageBlockParam
+							const imageUrl = `data:${image.source.media_type};base64,${image.source.data}`
+							content.push({ type: "input_image", image_url: imageUrl })
+						} else if (block.type === "tool_result") {
+							const result =
+								typeof block.content === "string"
+									? block.content
+									: block.content?.map((c: any) => (c.type === "text" ? c.text : "")).join("") || ""
+							toolResults.push({
+								type: "function_call_output",
+								call_id: sanitizeOpenAiCallId(block.tool_use_id),
+								output: result,
+							})
+						}
+					}
+				}
+
+				if (content.length > 0) {
+					formattedInput.push({ role: "user", content })
+				}
+				if (toolResults.length > 0) {
+					formattedInput.push(...toolResults)
+				}
+			} else if (message.role === "assistant") {
+				const content: any[] = []
+				const toolCalls: any[] = []
+
+				if (typeof message.content === "string") {
+					content.push({ type: "output_text", text: message.content })
+				} else if (Array.isArray(message.content)) {
+					for (const block of message.content) {
+						if (block.type === "text") {
+							content.push({ type: "output_text", text: block.text })
+						} else if (block.type === "tool_use") {
+							toolCalls.push({
+								type: "function_call",
+								call_id: sanitizeOpenAiCallId(block.id),
+								name: block.name,
+								arguments: JSON.stringify(block.input),
+							})
+						}
+					}
+				}
+
+				if (content.length > 0) {
+					formattedInput.push({ role: "assistant", content })
+				}
+				if (toolCalls.length > 0) {
+					formattedInput.push(...toolCalls)
+				}
+			}
+		}
+
+		return formattedInput
+	}
+
+	/**
+	 * Converts tools from the Chat Completions format to the Responses API format.
+	 * The Responses API uses a flat structure: {type, name, description, parameters, strict}
+	 * instead of the nested {type, function: {name, description, parameters}} format.
+	 */
+	private _convertToolsForResponsesApi(tools: any[] | undefined): any[] | undefined {
+		if (!tools || tools.length === 0) {
+			return undefined
+		}
+
+		return tools
+			.filter((tool: any) => tool.type === "function")
+			.map((tool: any) => {
+				const isMcp = isMcpTool(tool.function.name)
+				return {
+					type: "function",
+					name: tool.function.name,
+					description: tool.function.description,
+					parameters: isMcp
+						? tool.function.parameters
+						: this.convertToolSchemaForOpenAI(tool.function.parameters),
+					strict: !isMcp,
+				}
+			})
+	}
+
 	protected _getUrlHost(baseUrl?: string): string {
 		try {
 			return new URL(baseUrl ?? "").host

From f9cb644660e43cf00e8b7d93974fd1ccacbe36da Mon Sep 17 00:00:00 2001
From: Armando Vaquera <263793884+proyectoauraorg@users.noreply.github.com>
Date: Sun, 24 May 2026 16:07:05 -0600
Subject: [PATCH 2/4] test(openai): cover codex Responses-API tool-schema
 conversion (#87)

---
 .../__tests__/openai-codex-responses.spec.ts  | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/src/api/providers/__tests__/openai-codex-responses.spec.ts b/src/api/providers/__tests__/openai-codex-responses.spec.ts
index 6e38086f4..10d2f5594 100644
--- a/src/api/providers/__tests__/openai-codex-responses.spec.ts
+++ b/src/api/providers/__tests__/openai-codex-responses.spec.ts
@@ -244,6 +244,60 @@ describe("OpenAiHandler - Codex model detection", () => {
 			expect(input[3].type).toBe("function_call_output")
 			expect(input[3].output).toBe("2")
 		})
+
+		it("converts tools to the flat Responses API schema (not nested under `function`)", async () => {
+			handler = new OpenAiHandler({
+				openAiApiKey: "test-key",
+				openAiModelId: "gpt-5.3-codex",
+				openAiBaseUrl: "https://test.openai.azure.com/openai/deployments/gpt5.3",
+				openAiUseAzure: true,
+			})
+
+			mockResponsesCreate.mockResolvedValue({
+				[Symbol.asyncIterator]: async function* () {
+					yield { type: "response.done", response: { usage: { input_tokens: 1, output_tokens: 1 } } }
+				},
+			})
+
+			const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hi" }]
+			// Chat Completions nested tool shape (what the rest of the handler receives).
+			const tools = [
+				{
+					type: "function",
+					function: {
+						name: "calculator",
+						description: "Evaluate a math expression",
+						parameters: {
+							type: "object",
+							properties: { expression: { type: "string" } },
+							required: ["expression"],
+						},
+					},
+				},
+			]
+
+			for await (const _chunk of handler.createMessage("You are helpful", messages, {
+				taskId: "test",
+				tools,
+			} as any)) {
+				// Consume the stream so responses.create is invoked.
+			}
+
+			const requestBody = mockResponsesCreate.mock.calls[0][0]
+			expect(Array.isArray(requestBody.tools)).toBe(true)
+			expect(requestBody.tools).toHaveLength(1)
+
+			const tool = requestBody.tools[0]
+			// Flat structure: fields live at the top level, not under `function`.
+			expect(tool.type).toBe("function")
+			expect(tool.name).toBe("calculator")
+			expect(tool.description).toBe("Evaluate a math expression")
+			expect(tool.function).toBeUndefined()
+			expect(tool.parameters).toBeDefined()
+			expect(tool.parameters.properties).toBeDefined()
+			// Non-MCP tools are sent with strict schema validation enabled.
+			expect(tool.strict).toBe(true)
+		})
 	})
 
 	describe("createMessage codex tool call streaming", () => {

From b2ea01f40d761e311972ae64ffca4f76cd2d4036 Mon Sep 17 00:00:00 2001
From: Armando Vaquera <263793884+proyectoauraorg@users.noreply.github.com>
Date: Sun, 24 May 2026 16:09:36 -0600
Subject: [PATCH 3/4] fix(openai): always fail on codex Responses-API error
 events (#87)

Error/failed events without error/message fields were silently ignored,
risking continued processing of a broken stream. Throw unconditionally on
response.error/error/response.failed (with an Unknown error/failure fallback).
Adds a test for the no-details case.
---
 .../__tests__/openai-codex-responses.spec.ts  | 22 +++++++++++++++++++
 src/api/providers/openai.ts                   | 16 +++++---------
 2 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/src/api/providers/__tests__/openai-codex-responses.spec.ts b/src/api/providers/__tests__/openai-codex-responses.spec.ts
index 10d2f5594..72d878b51 100644
--- a/src/api/providers/__tests__/openai-codex-responses.spec.ts
+++ b/src/api/providers/__tests__/openai-codex-responses.spec.ts
@@ -515,5 +515,27 @@ describe("OpenAiHandler - Codex model detection", () => {
 				}
 			}).rejects.toThrow("Responses API error: Something went wrong")
 		})
+
+		it("throws on an error event even when it carries no error/message details", async () => {
+			handler = new OpenAiHandler({
+				openAiApiKey: "test-key",
+				openAiModelId: "gpt-5.3-codex",
+				openAiUseAzure: true,
+			})
+
+			mockResponsesCreate.mockResolvedValue({
+				[Symbol.asyncIterator]: async function* () {
+					yield { type: "response.failed" }
+				},
+			})
+
+			await expect(async () => {
+				for await (const _chunk of handler.createMessage("System", [{ role: "user", content: "Hello" }], {
+					taskId: "test",
+				})) {
+					// consume
+				}
+			}).rejects.toThrow("Response failed: Unknown failure")
+		})
 	})
 })
diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index c362204ca..eb97f3a5c 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -836,22 +836,16 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 					continue
 				}
 
-				// Handle error events
+				// Handle error events. Always fail on an error/failed event type, even
+				// when it carries no `error`/`message`, so we never keep consuming a
+				// stream the API has already reported as broken.
 				if (event?.type === "response.error" || event?.type === "error") {
-					if (event.error || event.message) {
-						throw new Error(
-							`Responses API error: ${event.error?.message || event.message || "Unknown error"}`,
-						)
-					}
+					throw new Error(`Responses API error: ${event.error?.message || event.message || "Unknown error"}`)
 				}
 
 				// Handle failed event
 				if (event?.type === "response.failed") {
-					if (event.error || event.message) {
-						throw new Error(
-							`Response failed: ${event.error?.message || event.message || "Unknown failure"}`,
-						)
-					}
+					throw new Error(`Response failed: ${event.error?.message || event.message || "Unknown failure"}`)
 				}
 
 				// Fallback for older formats

From 2402b0f326beec4504e3926c8190824137b43a73 Mon Sep 17 00:00:00 2001
From: Armando Vaquera <263793884+proyectoauraorg@users.noreply.github.com>
Date: Tue, 26 May 2026 00:37:35 -0600
Subject: [PATCH 4/4] fix(openai): address CodeRabbit review comments on Codex
 handler

- Fix potential infinite loop when reading truncated output (artifact_id guard)
- Add defensive checks for empty/missing tool call fields
- Improve error messages with more context for debugging
---
 src/api/providers/openai.ts | 77 ++++++++++++++++++++++++++++++++++---
 1 file changed, 72 insertions(+), 5 deletions(-)

diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index eb97f3a5c..b135d5f3e 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -377,17 +377,21 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 			throw handleOpenAIError(error, this.providerName)
 		}
 
-		// Extract text from the Responses API response
+		// Extract text from the Responses API response — collect ALL output_text parts
 		if (response?.output && Array.isArray(response.output)) {
+			const textParts: string[] = []
 			for (const outputItem of response.output) {
 				if (outputItem.type === "message" && outputItem.content) {
 					for (const content of outputItem.content) {
 						if (content.type === "output_text" && content.text) {
-							return content.text
+							textParts.push(content.text)
 						}
 					}
 				}
 			}
+			if (textParts.length > 0) {
+				return textParts.join("")
+			}
 		}
 
 		// Fallback: check for direct text in response
@@ -595,12 +599,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		// Build tools in Responses API format (flat structure, not nested under function)
 		const tools = this._convertToolsForResponsesApi(metadata?.tools)
 
-		// Build the request body
+		const useStreaming = this.options.openAiStreamingEnabled ?? true
+
+		// Build the request body (stream flag added per path below)
 		const requestBody: any = {
 			model: model.id,
 			input: formattedInput,
-			stream: true,
-			store: false,
+			store: metadata?.store ?? false,
 			instructions: systemPrompt,
 			...(tools && tools.length > 0 ? { tools } : {}),
 			...(metadata?.tool_choice ? { tool_choice: metadata.tool_choice } : {}),
@@ -617,6 +622,68 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 			requestBody.max_output_tokens = this.options.modelMaxTokens || model.info.maxTokens
 		}
 
+		if (!useStreaming) {
+			// Non-streaming path: await the full response then yield results
+			try {
+				const response = await (this.client as any).responses.create({
+					...requestBody,
+					stream: false,
+				})
+
+				// Extract text, tool calls, and reasoning from response output
+				if (Array.isArray(response?.output)) {
+					for (const outputItem of response.output) {
+						if (outputItem?.type === "function_call" || outputItem?.type === "tool_call") {
+							const callId = outputItem.call_id || outputItem.tool_call_id || outputItem.id
+							const name = outputItem.name || outputItem.function?.name
+							const argsRaw = outputItem.arguments || outputItem.function?.arguments || outputItem.input
+							const args =
+								typeof argsRaw === "string"
+									? argsRaw
+									: argsRaw && typeof argsRaw === "object"
+										? JSON.stringify(argsRaw)
+										: ""
+							if (typeof callId === "string" && callId.length > 0 && typeof name === "string" && name.length > 0) {
+								yield { type: "tool_call", id: callId, name, arguments: args }
+							}
+						} else if ((outputItem?.type === "text" || outputItem?.type === "output_text") && outputItem?.text) {
+							yield { type: "text", text: outputItem.text }
+						} else if (outputItem?.type === "message" && Array.isArray(outputItem.content)) {
+							for (const content of outputItem.content) {
+								if ((content?.type === "text" || content?.type === "output_text") && content?.text) {
+									yield { type: "text", text: content.text }
+								}
+							}
+						} else if (outputItem?.type === "reasoning" && outputItem?.summary) {
+							for (const summary of outputItem.summary) {
+								if (summary?.text) {
+									yield { type: "reasoning", text: summary.text }
+								}
+							}
+						}
+					}
+				}
+
+				// Extract usage from non-streaming response
+				const usage = response?.usage
+				if (usage) {
+					yield {
+						type: "usage",
+						inputTokens: usage.input_tokens ?? usage.prompt_tokens ?? 0,
+						outputTokens: usage.output_tokens ?? usage.completion_tokens ?? 0,
+						cacheWriteTokens: usage.cache_creation_input_tokens || undefined,
+						cacheReadTokens: usage.cache_read_input_tokens || undefined,
+					}
+				}
+			} catch (error) {
+				throw handleOpenAIError(error, this.providerName)
+			}
+			return
+		}
+
+		// Streaming path
+		requestBody.stream = true
+
 		// State tracking for streaming
 		let pendingToolCallId: string | undefined
 		let pendingToolCallName: string | undefined