From 3a7f238cb47ce0ea706cf3e4fc62ab49df2ceafe Mon Sep 17 00:00:00 2001 From: Rob Lourens Date: Tue, 18 Nov 2025 16:03:57 -0800 Subject: [PATCH] Fix using the wrong runTest tool reference for prompt hints (#2066) * Fix using the wrong runTest tool reference * Update test snapshot --- .../copilot/src/extension/intents/node/agentIntent.ts | 2 +- .../prompts/node/agent/openai/gpt51CodexPrompt.tsx | 2 +- .../extension/prompts/node/agent/openai/gpt51Prompt.tsx | 2 +- .../prompts/node/agent/openai/gpt5CodexPrompt.tsx | 2 +- .../copilot/src/extension/tools/common/toolNames.ts | 2 -- .../src/extension/tools/node/test/testFailure.spec.tsx | 8 ++++---- .../copilot/src/extension/tools/node/testFailureTool.tsx | 4 ++-- 7 files changed, 10 insertions(+), 12 deletions(-) diff --git a/extensions/copilot/src/extension/intents/node/agentIntent.ts b/extensions/copilot/src/extension/intents/node/agentIntent.ts index 854715c5996..f3213344b05 100644 --- a/extensions/copilot/src/extension/intents/node/agentIntent.ts +++ b/extensions/copilot/src/extension/intents/node/agentIntent.ts @@ -90,7 +90,7 @@ export const getAgentTools = (instaService: IInstantiationService, request: vsco } } - allowTools[ToolName.RunTests] = await testService.hasAnyTests(); + allowTools[ToolName.CoreRunTest] = await testService.hasAnyTests(); allowTools[ToolName.CoreRunTask] = tasksService.getTasks().length > 0; if (model.family === 'gpt-5-codex' || model.family.includes('grok-code')) { diff --git a/extensions/copilot/src/extension/prompts/node/agent/openai/gpt51CodexPrompt.tsx b/extensions/copilot/src/extension/prompts/node/agent/openai/gpt51CodexPrompt.tsx index ff0760d6773..4bfe58a37b7 100644 --- a/extensions/copilot/src/extension/prompts/node/agent/openai/gpt51CodexPrompt.tsx +++ b/extensions/copilot/src/extension/prompts/node/agent/openai/gpt51CodexPrompt.tsx @@ -34,7 +34,7 @@ class Gpt51CodexPrompt extends PromptElement { - You have access to many tools. If a tool exists to perform a specific task, you MUST use that tool instead of running a terminal command to perform that task.
- {tools[ToolName.RunTests] && <>- Use the {ToolName.RunTests} tool to run tests instead of running terminal commands.
} + {tools[ToolName.CoreRunTest] && <>- Use the {ToolName.CoreRunTest} tool to run tests instead of running terminal commands.
} {tools[ToolName.CoreManageTodoList] && <>
## {ToolName.CoreManageTodoList} tool
diff --git a/extensions/copilot/src/extension/prompts/node/agent/openai/gpt51Prompt.tsx b/extensions/copilot/src/extension/prompts/node/agent/openai/gpt51Prompt.tsx index 2d9d1d3292b..6e281810408 100644 --- a/extensions/copilot/src/extension/prompts/node/agent/openai/gpt51Prompt.tsx +++ b/extensions/copilot/src/extension/prompts/node/agent/openai/gpt51Prompt.tsx @@ -158,7 +158,7 @@ class Gpt51Prompt extends PromptElement { - Do not use one-letter variable names unless explicitly requested.
- NEVER output inline citations like "【F:README.md†L5-L14】" in your outputs. The UI is not able to render these so they will just be broken in the UI. Instead, if you output valid filepaths, users will be able to click on them to open the files in their editor.
- You have access to many tools. If a tool exists to perform a specific task, you MUST use that tool instead of running a terminal command to perform that task.
- {tools[ToolName.RunTests] && <>- Use the {ToolName.RunTests} tool to run tests instead of running terminal commands.
} + {tools[ToolName.CoreRunTest] && <>- Use the {ToolName.CoreRunTest} tool to run tests instead of running terminal commands.
}
If the codebase has tests or the ability to build or run, consider using them to verify changes once your work is complete.
diff --git a/extensions/copilot/src/extension/prompts/node/agent/openai/gpt5CodexPrompt.tsx b/extensions/copilot/src/extension/prompts/node/agent/openai/gpt5CodexPrompt.tsx index 1946ae24aa6..a65898e902b 100644 --- a/extensions/copilot/src/extension/prompts/node/agent/openai/gpt5CodexPrompt.tsx +++ b/extensions/copilot/src/extension/prompts/node/agent/openai/gpt5CodexPrompt.tsx @@ -29,7 +29,7 @@ class CodexStyleGpt5CodexPrompt extends PromptElement {
## Tool use
- You have access to many tools. If a tool exists to perform a specific task, you MUST use that tool instead of running a terminal command to perform that task.
- {tools[ToolName.RunTests] && <>- Use the {ToolName.RunTests} tool to run tests instead of running terminal commands.
} + {tools[ToolName.CoreRunTest] && <>- Use the {ToolName.CoreRunTest} tool to run tests instead of running terminal commands.
} {tools[ToolName.CoreManageTodoList] && <>
## {ToolName.CoreManageTodoList} tool
diff --git a/extensions/copilot/src/extension/tools/common/toolNames.ts b/extensions/copilot/src/extension/tools/common/toolNames.ts index a057a6a595d..a6601c45a66 100644 --- a/extensions/copilot/src/extension/tools/common/toolNames.ts +++ b/extensions/copilot/src/extension/tools/common/toolNames.ts @@ -23,7 +23,6 @@ export enum ToolName { Codebase = 'semantic_search', VSCodeAPI = 'get_vscode_api', TestFailure = 'test_failure', - RunTests = 'run_tests', FindFiles = 'file_search', FindTextInFiles = 'grep_search', ReadFile = 'read_file', @@ -211,7 +210,6 @@ export const toolCategories: Record = { [ToolName.CoreTerminalLastCommand]: ToolCategory.VSCodeInteraction, // Testing - [ToolName.RunTests]: ToolCategory.Testing, [ToolName.TestFailure]: ToolCategory.Testing, [ToolName.FindTestFiles]: ToolCategory.Testing, [ToolName.CoreRunTest]: ToolCategory.Testing, diff --git a/extensions/copilot/src/extension/tools/node/test/testFailure.spec.tsx b/extensions/copilot/src/extension/tools/node/test/testFailure.spec.tsx index 253b93f2c07..eaa8cb7b89a 100644 --- a/extensions/copilot/src/extension/tools/node/test/testFailure.spec.tsx +++ b/extensions/copilot/src/extension/tools/node/test/testFailure.spec.tsx @@ -91,7 +91,7 @@ suite('TestFailureTool', () => { test('returns a message when no failures exist', async () => { failures = []; const result = await resolver.invoke({ input: {}, toolInvocationToken: '' as any }); - expect(await toolResultToString(accessor, result)).toMatchInlineSnapshot(`"No test failures were found yet, call the tool run_tests to run tests and find failures."`); + expect(await toolResultToString(accessor, result)).toMatchInlineSnapshot(`"No test failures were found yet, call the tool runTests to run tests and find failures."`); }); test('formats stack frames', async () => { @@ -129,7 +129,7 @@ suite('TestFailureTool', () => { ## Rules: - Always try to find an error in the implementation code first. Don't suggest any changes in my test cases unless I tell you to. - If you need more information about anything in the codebase, use a tool like read_file, list_dir, or file_search to find and read it. Never ask the user to provide it themselves. - - If you make changes to fix the test, call run_tests to run the tests and verify the fix. + - If you make changes to fix the test, call runTests to run the tests and verify the fix. - Don't try to make the same changes you made before to fix the test. If you're stuck, ask the user for pointers. " `); @@ -165,7 +165,7 @@ suite('TestFailureTool', () => { ## Rules: - Always try to find an error in the implementation code first. Don't suggest any changes in my test cases unless I tell you to. - If you need more information about anything in the codebase, use a tool like read_file, list_dir, or file_search to find and read it. Never ask the user to provide it themselves. - - If you make changes to fix the test, call run_tests to run the tests and verify the fix. + - If you make changes to fix the test, call runTests to run the tests and verify the fix. - Don't try to make the same changes you made before to fix the test. If you're stuck, ask the user for pointers. " `); @@ -264,7 +264,7 @@ suite('TestFailureTool', () => { ## Rules: - Always try to find an error in the implementation code first. Don't suggest any changes in my test cases unless I tell you to. - If you need more information about anything in the codebase, use a tool like read_file, list_dir, or file_search to find and read it. Never ask the user to provide it themselves. - - If you make changes to fix the test, call run_tests to run the tests and verify the fix. + - If you make changes to fix the test, call runTests to run the tests and verify the fix. - Don't try to make the same changes you made before to fix the test. If you're stuck, ask the user for pointers. " `); diff --git a/extensions/copilot/src/extension/tools/node/testFailureTool.tsx b/extensions/copilot/src/extension/tools/node/testFailureTool.tsx index 887ca231119..4179140fb4f 100644 --- a/extensions/copilot/src/extension/tools/node/testFailureTool.tsx +++ b/extensions/copilot/src/extension/tools/node/testFailureTool.tsx @@ -58,7 +58,7 @@ export class TestFailureTool implements ICopilotTool<{}> { const failures = Array.from(this.testProvider.getAllFailures()); if (failures.length === 0) { return new LanguageModelToolResult([ - new LanguageModelTextPart(`No test failures were found yet, call the tool ${ToolName.RunTests} to run tests and find failures.`), + new LanguageModelTextPart(`No test failures were found yet, call the tool ${ToolName.CoreRunTest} to run tests and find failures.`), ]); } @@ -118,7 +118,7 @@ export class TestFailureList extends PromptElement ## Rules:
- Always try to find an error in the implementation code first. Don't suggest any changes in my test cases unless I tell you to.
- If you need more information about anything in the codebase, use a tool like {ToolName.ReadFile}, {ToolName.ListDirectory}, or {ToolName.FindFiles} to find and read it. Never ask the user to provide it themselves.
- - If you make changes to fix the test, call {ToolName.RunTests} to run the tests and verify the fix.
+ - If you make changes to fix the test, call {ToolName.CoreRunTest} to run the tests and verify the fix.
- Don't try to make the same changes you made before to fix the test. If you're stuck, ask the user for pointers.
;