Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions apps/vscode-e2e/src/fixtures/subtasks.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import { LLMock } from "@copilotkit/aimock"
import type { ChatCompletionRequest } from "@copilotkit/aimock"

import { toolResultContains } from "./tool-result"

const SUBTASK_PARENT_MARKER = "SUBTASK_PARENT_CANCELLATION_SMOKE"
const SUBTASK_CHILD_MARKER = "SUBTASK_CHILD_CALCULATOR_SMOKE"

const SUBTASK_CHILD_PROMPT = `${SUBTASK_CHILD_MARKER}: Ask the user exactly this follow-up question: What is the square root of 81? After the user answers, complete with only the answer.`
export const SUBTASK_PARENT_PROMPT = `${SUBTASK_PARENT_MARKER}: Use the new_task tool exactly once. Create an ask-mode subtask with this exact message: "${SUBTASK_CHILD_PROMPT}" Do not answer directly.`
export const SUBTASK_CHILD_FOLLOWUP_ANSWER = "9"

const requestContains = (req: ChatCompletionRequest, expected: string[]) => {
const rawRequest = JSON.stringify(req)
return expected.every((text) => rawRequest.includes(text))
}

const completionAfterAnswer = (followupId: string, completionId: string) => ({
match: {
predicate: (req: ChatCompletionRequest) =>
// Preferred: structured tool-result message carries the followup answer.
toolResultContains(req, followupId, [SUBTASK_CHILD_FOLLOWUP_ANSWER]) ||
// Fallback 1: answer present alongside the tool-call ID but not in a role:tool message.
requestContains(req, [followupId, SUBTASK_CHILD_FOLLOWUP_ANSWER]) ||
// Fallback 2: answer arrives as a bare user message after task resume (no tool-call ID context).
requestContains(req, [
SUBTASK_CHILD_MARKER,
`<user_message>\\n${SUBTASK_CHILD_FOLLOWUP_ANSWER}\\n</user_message>`,
]),
},
response: {
toolCalls: [
{
name: "attempt_completion",
arguments: JSON.stringify({ result: "9" }),
id: completionId,
},
],
},
})

export function addSubtaskFixtures(mock: InstanceType<typeof LLMock>) {
mock.addFixture({
match: {
userMessage: new RegExp(SUBTASK_PARENT_MARKER),
},
response: {
toolCalls: [
{
name: "new_task",
arguments: JSON.stringify({
mode: "ask",
message: SUBTASK_CHILD_PROMPT,
}),
id: "call_subtasks_parent_new_task_001",
},
],
},
})

mock.addFixture({
match: {
userMessage: new RegExp(SUBTASK_CHILD_MARKER),
},
response: {
toolCalls: [
{
name: "ask_followup_question",
arguments: JSON.stringify({
question: "What is the square root of 81?",
follow_up: [{ text: SUBTASK_CHILD_FOLLOWUP_ANSWER }],
}),
id: "call_subtasks_child_followup_001",
},
],
},
})

mock.addFixture(completionAfterAnswer("call_subtasks_child_followup_001", "call_subtasks_child_completion_002"))

mock.addFixture({
match: {
toolCallId: "call_subtasks_parent_new_task_001",
},
response: {
toolCalls: [
{
name: "attempt_completion",
arguments: JSON.stringify({ result: "Parent task resumed" }),
id: "call_subtasks_parent_completion_003",
},
],
},
})
}
2 changes: 2 additions & 0 deletions apps/vscode-e2e/src/runTest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { addExecuteCommandResultFixtures } from "./fixtures/execute-command"
import { addListFilesResultFixtures } from "./fixtures/list-files"
import { addReadFileResultFixtures } from "./fixtures/read-file"
import { addSearchFilesResultFixtures } from "./fixtures/search-files"
import { addSubtaskFixtures } from "./fixtures/subtasks"
import { addUseMcpToolResultFixtures } from "./fixtures/use-mcp-tool"
import { addWriteToFileResultFixtures } from "./fixtures/write-to-file"

Expand Down Expand Up @@ -110,6 +111,7 @@ async function main() {
addListFilesResultFixtures(mock)
addReadFileResultFixtures(mock)
addSearchFilesResultFixtures(mock)
addSubtaskFixtures(mock)
addUseMcpToolResultFixtures(mock)
addWriteToFileResultFixtures(mock)

Expand Down
226 changes: 172 additions & 54 deletions apps/vscode-e2e/src/suite/subtasks.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,73 +2,191 @@ import * as assert from "assert"

import { RooCodeEventName, type ClineMessage } from "@roo-code/types"

import { sleep, waitFor, waitUntilCompleted } from "./utils"
import { setDefaultSuiteTimeout } from "./test-utils"
import { waitFor, waitUntilCompleted } from "./utils"
import { SUBTASK_CHILD_FOLLOWUP_ANSWER, SUBTASK_PARENT_PROMPT } from "../fixtures/subtasks"

suite.skip("Roo Code Subtasks", () => {
test("Should handle subtask cancellation and resumption correctly", async () => {
suite("Roo Code Subtasks", function () {
setDefaultSuiteTimeout(this)

// Race mitigation: skipDelegationRepair prevents removeClineFromStack from
// auto-resuming the parent when the child is cancelled (Race 2).
test("parent stays paused after subtask cancellation", async () => {
const api = globalThis.api
const asks: Record<string, ClineMessage[]> = {}
const messages: Record<string, ClineMessage[]> = {}

const messageHandler = ({ taskId, message }: { taskId: string; message: ClineMessage }) => {
if (message.type === "ask") {
asks[taskId] = asks[taskId] || []
asks[taskId].push(message)
}
if (message.type === "say" && message.partial === false) {
messages[taskId] = messages[taskId] || []
messages[taskId].push(message)
}
}

api.on(RooCodeEventName.Message, messageHandler)

try {
const parentTaskId = await api.startNewTask({
configuration: {
mode: "ask",
alwaysAllowModeSwitch: true,
alwaysAllowSubtasks: true,
autoApprovalEnabled: true,
enableCheckpoints: false,
},
text: SUBTASK_PARENT_PROMPT,
})

let spawnedTaskId: string | undefined
await waitFor(() => {
const stack = api.getCurrentTaskStack()
const current = stack[stack.length - 1]
if (current && current !== parentTaskId) {
spawnedTaskId = current
return true
}
return false
})

await waitFor(
() => asks[spawnedTaskId!]?.some(({ type, ask }) => type === "ask" && ask === "followup") ?? false,
)

await api.cancelCurrentTask()

assert.ok(
messages[parentTaskId]?.find(({ type, text }) => type === "say" && text === "Parent task resumed") ===
undefined,
"Parent task should not have resumed after subtask cancellation",
)

await waitFor(() => api.getCurrentTaskStack().at(-1) === spawnedTaskId)
await waitFor(
() => asks[spawnedTaskId!]?.some(({ type, ask }) => type === "ask" && ask === "resume_task") ?? false,
)

await api.clearCurrentTask()
// The parent task is still in the stack; drain it so it doesn't leak into the next test.
await api.clearCurrentTask()
await waitFor(() => api.getCurrentTaskStack().length === 0)
} finally {
api.off(RooCodeEventName.Message, messageHandler)
}
})

// Race mitigation: runDelegationTransition lock + cancelledDelegationChildIds guard
// ensures cancelTask() wins over a concurrent reopenParentFromDelegation() (Race 3).
test("cancelled child completes in-place and does not reopen parent", async () => {
const api = globalThis.api
const asks: Record<string, ClineMessage[]> = {}
const messages: Record<string, ClineMessage[]> = {}

api.on(RooCodeEventName.Message, ({ taskId, message }) => {
const messageHandler = ({ taskId, message }: { taskId: string; message: ClineMessage }) => {
if (message.type === "ask") {
asks[taskId] = asks[taskId] || []
asks[taskId].push(message)
}
if (message.type === "say" && message.partial === false) {
messages[taskId] = messages[taskId] || []
messages[taskId].push(message)
}
})

const childPrompt = "You are a calculator. Respond only with numbers. What is the square root of 9?"

// Start a parent task that will create a subtask.
const parentTaskId = await api.startNewTask({
configuration: {
mode: "ask",
alwaysAllowModeSwitch: true,
alwaysAllowSubtasks: true,
autoApprovalEnabled: true,
enableCheckpoints: false,
},
text:
"You are the parent task. " +
`Create a subtask by using the new_task tool with the message '${childPrompt}'.` +
"After creating the subtask, wait for it to complete and then respond 'Parent task resumed'.",
})

let spawnedTaskId: string | undefined = undefined

// Wait for the subtask to be spawned and then cancel it.
api.on(RooCodeEventName.TaskSpawned, (_, childTaskId) => (spawnedTaskId = childTaskId))
await waitFor(() => !!spawnedTaskId)
await sleep(1_000) // Give the task a chance to start and populate the history.
await api.cancelCurrentTask()

// Wait a bit to ensure any task resumption would have happened.
await sleep(2_000)

// The parent task should not have resumed yet, so we shouldn't see
// "Parent task resumed".
assert.ok(
messages[parentTaskId]?.find(({ type, text }) => type === "say" && text === "Parent task resumed") ===
undefined,
"Parent task should not have resumed after subtask cancellation",
)
}

const findCompletionText = (taskId: string) =>
messages[taskId]
?.filter(
(message) =>
message.type === "say" && (message.say === "completion_result" || message.say === "text"),
)
.map((message) => message.text?.trim())
.find((text): text is string => !!text)

const findErrorText = (taskId: string) =>
messages[taskId]
?.filter((message) => message.type === "say" && message.say === "error")
.map((message) => message.text?.trim())
.find((text): text is string => !!text)

api.on(RooCodeEventName.Message, messageHandler)

try {
const parentTaskId = await api.startNewTask({
configuration: {
mode: "ask",
alwaysAllowModeSwitch: true,
alwaysAllowSubtasks: true,
autoApprovalEnabled: true,
enableCheckpoints: false,
},
text: SUBTASK_PARENT_PROMPT,
})

let spawnedTaskId: string | undefined
await waitFor(() => {
const stack = api.getCurrentTaskStack()
const current = stack[stack.length - 1]
if (current && current !== parentTaskId) {
spawnedTaskId = current
return true
}
return false
})

await waitFor(
() => asks[spawnedTaskId!]?.some(({ type, ask }) => type === "ask" && ask === "followup") ?? false,
)

const cancelledChildTaskId = spawnedTaskId!
await api.cancelCurrentTask()

// Start a new task with the same message as the subtask.
const anotherTaskId = await api.startNewTask({ text: childPrompt })
await waitUntilCompleted({ api, taskId: anotherTaskId })
await waitFor(() => api.getCurrentTaskStack().at(-1) === cancelledChildTaskId)
await waitFor(
() =>
asks[cancelledChildTaskId]?.some(({ type, ask }) => type === "ask" && ask === "resume_task") ??
false,
)

// Wait a bit to ensure any task resumption would have happened.
await sleep(2_000)
const resumedChildTaskId = await waitUntilCompleted({
api,
start: async () => {
await api.sendMessage(SUBTASK_CHILD_FOLLOWUP_ANSWER)
return cancelledChildTaskId
},
})

// The parent task should still not have resumed.
assert.ok(
messages[parentTaskId]?.find(({ type, text }) => type === "say" && text === "Parent task resumed") ===
assert.strictEqual(
resumedChildTaskId,
cancelledChildTaskId,
"Cancelled child task should be resumed in place",
)
assert.strictEqual(
findErrorText(resumedChildTaskId),
undefined,
"Parent task should not have resumed after subtask cancellation",
)
"Resumed child task should not emit an error",
)
assert.strictEqual(
findCompletionText(resumedChildTaskId),
"9",
"Resumed child task should complete with `9`",
)
assert.strictEqual(
api.getCurrentTaskStack().at(-1),
cancelledChildTaskId,
"Cancelled child task should remain the active completed task",
)
assert.ok(
messages[parentTaskId]?.find(({ type, text }) => type === "say" && text === "Parent task resumed") ===
undefined,
"Parent task should not have resumed after the cancelled child completed",
)

// Clean up - cancel all tasks.
await api.clearCurrentTask()
await waitUntilCompleted({ api, taskId: parentTaskId })
await api.clearCurrentTask()
} finally {
api.off(RooCodeEventName.Message, messageHandler)
}
})
})
6 changes: 3 additions & 3 deletions apps/vscode-e2e/src/suite/tools/apply-diff.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ suite("Roo Code apply_diff Tool", function () {

suiteTeardown(async () => {
try {
await globalThis.api.cancelCurrentTask()
await globalThis.api.clearCurrentTask()
} catch {
// Task might not be running
}
Expand All @@ -147,7 +147,7 @@ suite("Roo Code apply_diff Tool", function () {

setup(async () => {
try {
await globalThis.api.cancelCurrentTask()
await globalThis.api.clearCurrentTask()
} catch {
// Task might not be running
}
Expand All @@ -164,7 +164,7 @@ suite("Roo Code apply_diff Tool", function () {

teardown(async () => {
try {
await globalThis.api.cancelCurrentTask()
await globalThis.api.clearCurrentTask()
} catch {
// Task might not be running
}
Expand Down
Loading
Loading