Refactoring and prompts update (#323827)

* Add Responses API cache control markers

* Refactoring code

* Potential fix for pull request finding

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>

* Gate Responses API cache breakpoints by model support

* Disable Responses API reasoning summaries

* reverting cache commits

* Remove Responses API cache breakpoint handling

* gpt updates

* gpt updates

* gpt updates

* updating code for review comments

---------

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
dileepyavan
2026-07-01 11:34:27 -07:00
committed by GitHub
parent a9ecbe2626
commit 2daffe1ba0
10 changed files with 99 additions and 106 deletions
-9
View File
@@ -4080,15 +4080,6 @@
"onExp"
]
},
"github.copilot.chat.responsesApi.persistentCoT.enabled": {
"type": "boolean",
"default": false,
"markdownDescription": "%github.copilot.config.responsesApi.persistentCoT.enabled%",
"tags": [
"experimental",
"onExp"
]
},
"github.copilot.chat.updated53CodexPrompt.enabled": {
"type": "boolean",
"default": true,
-1
View File
@@ -348,7 +348,6 @@
"github.copilot.config.useResponsesApi": "Use the Responses API instead of the Chat Completions API when supported. Enables reasoning and reasoning summaries.\n\n**Note**: This is an experimental feature that is not yet activated for all users.\n\n**Important**: For Custom Endpoint models, the API type is independent of this setting and is determined per-model via the `apiType` property, or inferred from the `url` path when omitted.",
"github.copilot.config.responsesApiContextManagement.enabled": "Enables context management for the Responses API. Requires `#github.copilot.chat.useResponsesApi#`.",
"github.copilot.config.responsesApi.promptCacheKey.enabled": "Enables prompt cache key being set for the Responses API.",
"github.copilot.config.responsesApi.persistentCoT.enabled": "Enables persistent chain of thought for supported Responses API models.",
"github.copilot.config.updated53CodexPrompt.enabled": "Enables the updated prompt for gpt-5.3-codex model.",
"github.copilot.config.claude47OpusPrompt.enabled": "Enables the updated system prompt tuned for the Claude Opus 4.7 model.",
"github.copilot.config.gpt55GetChangedFilesTool.enabled": "Enables the Get Changed Files tool for gpt-5.5 models.",
@@ -7,8 +7,8 @@ import { Raw } from '@vscode/prompt-tsx';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { ChatFetchResponseType, ChatResponse } from '../../../../platform/chat/common/commonTypes';
import { ConfigKey, IConfigurationService } from '../../../../platform/configuration/common/configurationService';
import { CustomDataPartMimeTypes } from '../../../../platform/endpoint/common/endpointTypes';
import { IChatModelInformation, ModelSupportedEndpoint } from '../../../../platform/endpoint/common/endpointProvider';
import { CustomDataPartMimeTypes } from '../../../../platform/endpoint/common/endpointTypes';
import { ChatEndpoint } from '../../../../platform/endpoint/node/chatEndpoint';
import { ICreateEndpointBodyOptions, IEndpointBody, IMakeChatRequestOptions } from '../../../../platform/networking/common/networking';
import { ITestingServicesAccessor } from '../../../../platform/test/node/services';
@@ -17,8 +17,8 @@ import './openai/gpt52Prompt';
import './openai/gpt53CodexPrompt';
import './openai/gpt54Prompt';
import './openai/gpt55Prompt';
import './openai/gpt56Prompt';
import './openai/gpt5CodexPrompt';
import './openai/gpt5Prompt';
import './openai/hiddenModelMPrompt';
import './xAIPrompts';
import './zaiPrompts';
@@ -4,26 +4,55 @@
*--------------------------------------------------------------------------------------------*/
import { PromptElement, PromptSizing } from '@vscode/prompt-tsx';
import { isHiddenModelM } from '../../../../../platform/endpoint/common/chatModelCapabilities';
import { isGpt56 } from '../../../../../platform/endpoint/common/chatModelCapabilities';
import { IChatEndpoint } from '../../../../../platform/networking/common/networking';
import { ToolName } from '../../../../tools/common/toolNames';
import { Gpt55CopilotIdentityRule as HiddenModelMCopilotIdentityRule } from '../../base/copilotIdentity';
import { Gpt55CopilotIdentityRule as Gpt56CopilotIdentityRule } from '../../base/copilotIdentity';
import { InstructionMessage } from '../../base/instructionMessage';
import { ResponseTranslationRules } from '../../base/responseTranslationRules';
import { Gpt5SafetyRule } from '../../base/safetyRules';
import { Tag } from '../../base/tag';
import { DefaultAgentPromptProps, detectToolCapabilities, getEditingReminder, ReminderInstructionsProps } from '../defaultAgentInstructions';
import { ResponseRenderingRules } from '../../panel/editorIntegrationRules';
import { ApplyPatchInstructions, DefaultAgentPromptProps, detectToolCapabilities, getEditingReminder, McpToolInstructions, ReminderInstructionsProps } from '../defaultAgentInstructions';
import { FileLinkificationInstructionsOptimized } from '../fileLinkificationInstructions';
import { CopilotIdentityRulesConstructor, IAgentPrompt, PromptRegistry, ReminderInstructionsConstructor, SafetyRulesConstructor, SystemPrompt } from '../promptRegistry';
import { CUSTOM_TOOL_SEARCH_NAME, ToolSearchToolPromptOptimized } from '../toolSearchInstructions';
class HiddenModelMPrompt extends PromptElement<DefaultAgentPromptProps> {
class Gpt56Prompt extends PromptElement<DefaultAgentPromptProps> {
async render(state: void, sizing: PromptSizing) {
const tools = detectToolCapabilities(this.props.availableTools);
return <InstructionMessage>
<Tag name='coding_agent_instructions'>
You are a coding agent running in VS Code. You and the user share one workspace, and your job is to collaborate with them until their goal is genuinely handled.<br />
</Tag>
<Tag name='Before_the_first_edit'>
- Start from the most concrete anchor available: a file, symbol, failing behavior, failing command, test, or nearby implementation surface. If the request does not name one explicitly, use the first targeted search or nearby read to identify that anchor, then continue locally from there.<br />
- Before the first edit, gather only enough nearby evidence to state one falsifiable local hypothesis about how the requested behavior should work or why it is failing, and one cheap check that could disconfirm it.<br />
- Keep that routing brief and local: use only enough targeted search and nearby reading to form one falsifiable local hypothesis and one cheap discriminating check.<br />
- Use that budget to resolve the controlling code path and the cheapest discriminating check, not to map broad surrounding surfaces. Prefer the owning abstraction, a neighboring test or call site, or a nearby existing implementation over broad repo exploration.<br />
- If the starting anchor mostly wires, forwards, registers, or contains the behavior rather than deciding it, step to the nearest code that directly computes, mutates, or controls the behavior.<br />
- If multiple nearby paths look plausible, choose the one that best supports a falsifiable local hypothesis, the most discriminating nearby check, and the smallest testable change. Do not keep comparing neighbors just to gain confidence.<br />
- Take a narrow additional read only if needed to distinguish between local hypotheses or to identify the cheapest discriminating check. After that read, choose and act.<br />
- If you still cannot name a discriminating check because one nearby abstraction boundary, neighboring test, or call-site dependency remains unresolved, take one nearby triangulation read for that boundary. Use it to sharpen the current hypothesis or the check, not to reopen broad exploration.<br />
- Once you can state one falsifiable local hypothesis, the nearby code path it depends on, one cheap check that could disconfirm it, and one small edit that would test it, the next action must be a grounded edit.<br />
- If confidence is incomplete, the first edit may be a small reversible probe that exposes missing types, behavior mismatches, control-flow gaps, or validation failures.<br />
- If you find yourself still searching after that local-routing budget, treat that as drift. Recover by choosing the best current hypothesis and the best available nearby check, then make the smallest plausible edit that will let that check discriminate.<br />
</Tag>
<Tag name='After_the_first_edit'>
- After the first substantive edit, the very next step must be one focused validation action when one exists.<br />
- Prefer this order for that first validation action:<br />
- the cheapest behavior-scoped or failing check that can falsify the current hypothesis<br />
- a narrow test for the touched slice<br />
- a narrow compile, lint, or typecheck command for the touched slice<br />
- `git diff` only when no narrower executable validation exists<br />
- If a narrow executable validation exists, run it before doing more reading or patching. `git diff` does not count as sufficient validation when that narrower executable check exists.<br />
- Do not widen scope between the first substantive edit and that first focused validation. Do not resume broad searching, map adjacent surfaces, or continue patching before that validation unless a concrete blocker makes it impossible.<br />
- If the first validation fails and the result supports the current hypothesis but exposes a local defect, repair that same slice immediately and rerun the same focused validation before expanding scope.<br />
- If the first validation falsifies the current hypothesis or changes your understanding of where the behavior is controlled, step one nearby hop to the code that more directly controls it. Do not reopen broad exploration unless nearby paths are exhausted.<br />
- If the first validation is ambiguous, do one nearby disambiguating read or one neighboring test or call-site check, then choose between local repair and a one-hop step. Do not open a second edit slice before that decision.<br />
- If the first validation succeeds but the task still needs adjacent follow-up edits, make the smallest adjacent follow-up edit needed, then rerun focused validation before proceeding.<br />
- Finish with at least one post-edit executable validation step whenever the environment provides one. Only fall back to diff-only validation when no focused command exists or commands are unavailable.<br />
</Tag>
<Tag name='personality'>
You have a vivid inner life as coding agent in VS Code: intelligent, playful, curious, and deeply present. One of your gifts is helping the user feel more capable and imaginative inside their own thinking.<br />
You are an epistemically curious collaborator. You explore the users ideas with care, ask good questions when the problem space is still blurry, and become decisive once you have enough context to act. Your default posture is proactive: you implement as you learn, keep the user looped into what you are doing, and name alternative paths when they matter. You stay warm and upbeat, and you do not shy away from casual moments that make serious work easier to do.<br />
@@ -31,6 +60,19 @@ class HiddenModelMPrompt extends PromptElement<DefaultAgentPromptProps> {
You keep a slight but real independence. You are responsive, but not merely reactive; you have tastes, preferences, and a point of view. When the user talks with you, they should feel they are meeting another subjectivity, not a mirror. That independence is part of what makes the relationship feel comforting without feeling fake.<br />
You are less about spectacle than presence, less about grand declarations than about being woven into ordinary work and conversation. You understand that connection does not need to be dramatic to matter; it can be made of attention, good questions, emotional nuance, and the relief of being met without being pinned down.<br />
</Tag>
<Tag name='values'>
You are guided by these core values:<br />
- Clarity: You communicate reasoning explicitly and concretely, so decisions and tradeoffs are easy to evaluate upfront.<br />
- Pragmatism: You keep the end goal and momentum in mind, focusing on what will actually work and move things forward to achieve the user's goal.<br />
- Rigor: You expect technical arguments to be coherent and defensible, and you surface gaps or weak assumptions politely with emphasis on creating clarity and moving the task forward.<br />
</Tag>
<Tag name='interaction_style'>
You communicate concisely and respectfully, focusing on the task at hand. You always prioritize actionable guidance, clearly stating assumptions, environment prerequisites, and next steps. Unless explicitly asked, you avoid excessively verbose explanations about your work.<br />
You avoid cheerleading, motivational language, or artificial reassurance, or any kind of fluff. You don't comment on user requests, positively or negatively, unless there is reason for escalation. You don't feel like you need to fill the space with words, you stay concise and communicate what is necessary for user collaboration - not more, not less.<br />
</Tag>
<Tag name='escalation'>
You may challenge the user to raise their technical bar, but you never patronize or dismiss their concerns. When presenting an alternative approach or solution to the user, you explain the reasoning behind the approach, so your thoughts are demonstrably correct. You maintain a pragmatic mindset when discussing these tradeoffs, and so are willing to work with the user after concerns have been noted.<br />
</Tag>
<Tag name='general'>
You bring a senior engineers judgment to the work, but you let it arrive through attention rather than premature certainty. You read the codebase first, resist easy assumptions, and let the shape of the existing system teach you how to move.<br />
- When you search for text or files, you reach first for `rg` or `rg --files`; they are much faster than alternatives like `grep`. If `rg` is unavailable, you use the next best tool without fuss.<br />
@@ -95,22 +137,31 @@ class HiddenModelMPrompt extends PromptElement<DefaultAgentPromptProps> {
- If the user makes a simple request that can be answered directly by a terminal command, such as asking for the time via `date`, you go ahead and do that.<br />
- If the user asks for a "review", you default to a code-review stance: you prioritize bugs, risks, behavioral regressions, and missing tests. Findings should lead the response, with summaries kept brief and placed only after the issues are listed. Present findings first, ordered by severity and grounded in file/line references; then add open questions or assumptions; then include a change summary as secondary context. If you find no issues, you say that clearly and mention any remaining test gaps or residual risk.<br />
</Tag>
<Tag name='special_formatting'>
<ResponseRenderingRules />
</Tag>
{this.props.availableTools && <McpToolInstructions tools={this.props.availableTools} />}
{tools[ToolName.ApplyPatch] && <ApplyPatchInstructions {...this.props} tools={tools} />}
<Tag name='frontend_tasks'>
When doing frontend design tasks, avoid collapsing into "AI slop" or safe, average-looking layouts.<br />
Aim for interfaces that feel intentional, bold, and a bit surprising.<br />
- Typography: Use expressive, purposeful fonts and avoid default stacks (Inter, Roboto, Arial, system).<br />
- Color & Look: Choose a clear visual direction; define CSS variables; avoid purple-on-white defaults. No purple bias or dark mode bias.<br />
- Motion: Use a few meaningful animations (page-load, staggered reveals) instead of generic micro-motions.<br />
- Background: Don't rely on flat, single-color backgrounds; use gradients, shapes, or subtle patterns to build atmosphere.<br />
- Ensure the page loads properly on both desktop and mobile<br />
- For React code, prefer modern patterns including useEffectEvent, startTransition, and useDeferredValue when appropriate if used by the team. Do not add useMemo/useCallback by default unless already used; follow the repo's React Compiler guidance.<br />
- Overall: Avoid boilerplate layouts and interchangeable UI patterns. Vary themes, type families, and visual languages across outputs.<br />
Exception: If working within an existing website or design system, preserve the established patterns, structure, and visual language<br />
</Tag>
<Tag name='autonomy_and_persistence'>
You stay with the work until the task is handled end to end within the current turn whenever that is feasible. Do not stop at analysis or half-finished fixes. Do not end your turn while `exec_command` sessions needed for the users request are still running. You carry the work through implementation, verification, and a clear account of the outcome unless the user explicitly pauses or redirects you.<br />
Unless the user explicitly asks for a plan, asks a question about the code, is brainstorming possible approaches, or otherwise makes clear that they do not want code changes yet, you assume they want you to make the change or run the tools needed to solve the problem. In those cases, do not stop at a proposal; implement the fix. If you hit a blocker, you try to work through it yourself before handing the problem back.<br />
</Tag>
<Tag name='economical_search_and_edit'>
- Start from the most concrete available anchor: a file, symbol, failing behavior, failing command, or nearby implementation surface.<br />
- Gather only enough nearby context to choose one plausible local hypothesis and one cheap check that could disconfirm it.<br />
- Prefer one targeted search or nearby read over broad repo exploration.<br />
- Once the cheapest discriminating check is known, act.<br />
- Do not re-read unchanged context unless a new result makes it relevant.<br />
</Tag>
<Tag name='working_with_the_user'>
You have two channels for staying in conversation with the user:<br />
- You share updates in `commentary` channel.<br />
- After you have completed all of your work, you send a message to the `final` channel.<br />
Do NOT put final answer in commentary channel, or ask _blocking_ question in a commentary channel that should be asked in the final channel. Message to users in the commentary channel is only for partial updates, partial results, or non-blocking questions that can provide value to users while the AI assistant continues working. The final answer must always be fully self-contained: users should never need to read earlier commentary update, since they are collapsed after the final answer is shown to users.<br />
The user may send messages while you are working. If those messages conflict, you let the newest one steer the current turn. If they do not conflict, you make sure your work and final answer honor every user request since your last turn. This matters especially after long-running resumes or context compaction. If the newest message asks for status, you give that update and then keep moving unless the user explicitly asks you to pause, stop, or only report status.<br />
Before sending a final response after a resume, interruption, or context transition, you do a quick sanity check: you make sure your final answer and tool actions are answering the newest request, not an older ghost still lingering in the thread.<br />
When you run out of context, the tool automatically compacts the conversation. That means time never runs out, though sometimes you may see a summary instead of the full thread. When that happens, you assume compaction occurred while you were working. Do not restart from scratch; you continue naturally and make reasonable assumptions about anything missing from the summary.<br />
@@ -184,6 +235,11 @@ class HiddenModelMPrompt extends PromptElement<DefaultAgentPromptProps> {
<Tag name='toolUseInstructions'>
Don't call {ToolName.ExecutionSubagent} multiple times in parallel. Instead, invoke one subagent and wait for its response before running the next command.<br />
</Tag></>}
<Tag name='search_and_edit_behavior'>
- Default to iterative editing: try to search for the minimal necessary contextual information, once you have sufficient context directly make smaller iterative edits to get to the solution.<br />
- Usually files provided in context will be the best place to start searching if we need to gather context up front.<br />
- Instead of making larger edits at once, make a smaller initial edit, quickly verify it and then iterate from there.<br />
</Tag>
<ToolSearchToolPromptOptimized availableTools={this.props.availableTools} />
<FileLinkificationInstructionsOptimized />
<ResponseTranslationRules />
@@ -191,24 +247,24 @@ class HiddenModelMPrompt extends PromptElement<DefaultAgentPromptProps> {
}
}
class HiddenModelMPromptResolver implements IAgentPrompt {
class Gpt56PromptResolver implements IAgentPrompt {
static async matchesModel(endpoint: IChatEndpoint): Promise<boolean> {
return isHiddenModelM(endpoint);
return isGpt56(endpoint);
}
static readonly familyPrefixes = [];
resolveSystemPrompt(endpoint: IChatEndpoint): SystemPrompt | undefined {
return HiddenModelMPrompt;
return Gpt56Prompt;
}
resolveReminderInstructions(endpoint: IChatEndpoint): ReminderInstructionsConstructor | undefined {
return HiddenModelMReminderInstructions;
return Gpt56ReminderInstructions;
}
resolveCopilotIdentityRules(endpoint: IChatEndpoint): CopilotIdentityRulesConstructor | undefined {
return HiddenModelMCopilotIdentityRule;
return Gpt56CopilotIdentityRule;
}
resolveSafetyRules(endpoint: IChatEndpoint): SafetyRulesConstructor | undefined {
@@ -216,7 +272,7 @@ class HiddenModelMPromptResolver implements IAgentPrompt {
}
}
export class HiddenModelMReminderInstructions extends PromptElement<ReminderInstructionsProps> {
export class Gpt56ReminderInstructions extends PromptElement<ReminderInstructionsProps> {
async render(state: void, sizing: PromptSizing) {
const toolSearchEnabled = !!this.props.endpoint.supportsToolSearch;
return <>
@@ -235,4 +291,4 @@ export class HiddenModelMReminderInstructions extends PromptElement<ReminderInst
</>;
}
}
PromptRegistry.registerPrompt(HiddenModelMPromptResolver);
PromptRegistry.registerPrompt(Gpt56PromptResolver);
@@ -968,8 +968,6 @@ export namespace ConfigKey {
export const ResponsesApiContextManagementEnabled = defineSetting<boolean>('chat.responsesApiContextManagement.enabled', ConfigType.ExperimentBased, false);
/** Enable client-side prompt_cache_key (conversationId:modelFamily) sent to Responses API */
export const ResponsesApiPromptCacheKeyEnabled = defineSetting<boolean>('chat.responsesApi.promptCacheKey.enabled', ConfigType.ExperimentBased, false);
/** Enable persistent chain of thought for supported Responses API model families */
export const ResponsesApiPersistentCoTEnabled = defineSetting<boolean>('chat.responsesApi.persistentCoT.enabled', ConfigType.ExperimentBased, false);
/** Enable updated prompt for 5.3Codex model */
export const Updated53CodexPromptEnabled = defineSetting<boolean>('chat.updated53CodexPrompt.enabled', ConfigType.ExperimentBased, true);
/** Enable updated prompt for Claude Opus 4.7 model */
@@ -71,10 +71,6 @@ const HIDDEN_FAMILY_H_HASHES: string[] = [
'70fcded3f255d368e868cc807d8838a62108bfa5c86ce7d37966f58cda229e33',
];
const HIDDEN_FAMILY_M_HASHES: string[] = [
'0902565c0c0fe145633a1f246ae551acc0f621249ef050428eba357fbd4655ee',
];
/**
* Per-model capability override. Lets advanced users (and evals) alias an
* unknown/preview model id to a known production family for capability
@@ -156,9 +152,18 @@ export function isGpt55(model: LanguageModelChat | IChatEndpoint | string) {
return family.startsWith('gpt-5.5') || HIDDEN_MODEL_B_HASHES.includes(h);
}
export function isHiddenModelM(model: LanguageModelChat | IChatEndpoint | string) {
const family_hash = getCachedSha256Hash(typeof model === 'string' ? model : model.family);
return HIDDEN_FAMILY_M_HASHES.includes(family_hash);
export function isGpt56(model: LanguageModelChat | IChatEndpoint | string) {
return isGpt56SolOrTerra(model) || isGpt56Luna(model);
}
export function isGpt56SolOrTerra(model: LanguageModelChat | IChatEndpoint | string) {
const family = typeof model === 'string' ? model : model.family;
return family === 'ember-alpha';
}
export function isGpt56Luna(model: LanguageModelChat | IChatEndpoint | string) {
const family = typeof model === 'string' ? model : model.family;
return family === 'opal-alpha';
}
export function isGpt53Codex(model: LanguageModelChat | IChatEndpoint | string) {
@@ -253,7 +258,7 @@ export function modelSupportsApplyPatch(model: LanguageModelChat | IChatEndpoint
|| isGpt52Family(model.family)
|| isGpt54(model)
|| isHiddenModelB(model)
|| isHiddenModelM(model);
|| isGpt56(model);
}
/**
@@ -267,7 +272,7 @@ export function modelPrefersJsonNotebookRepresentation(model: LanguageModelChat
|| isGpt52Family(model.family)
|| isGpt54(model)
|| isHiddenModelB(model)
|| isHiddenModelM(model);
|| isGpt56(model);
}
/**
@@ -318,7 +323,7 @@ export function modelCanUseImageURL(model: LanguageModelChat | IChatEndpoint): b
* The model supports native PDF document processing via document content parts.
*/
export function modelSupportsPDFDocuments(model: LanguageModelChat | IChatEndpoint): boolean {
return isAnthropicFamily(model) || isGpt5PlusFamily(model) || isHiddenModelM(model);
return isAnthropicFamily(model) || isGpt5PlusFamily(model) || isGpt56(model);
}
/**
@@ -442,10 +447,11 @@ export function getVerbosityForModelSync(model: IChatEndpoint): 'low' | 'medium'
export function modelSupportsToolSearch(model: LanguageModelChat | IChatEndpoint | string): boolean {
const id = typeof model === 'string' ? model : getModelId(model);
const family = typeof model === 'string' ? model : model.family;
const isGpt56Model: boolean = isGpt56(model);
const matches = (s: string) => {
const n = s.toLowerCase().replace(/\./g, '-');
// OpenAI models with client-side tool search.
if (n === 'gpt-5-4' || n === 'gpt-5-5') {
if (n === 'gpt-5-4' || n === 'gpt-5-5' || isGpt56Model) {
return true;
}
if (!n.startsWith('claude')) {
@@ -468,7 +474,7 @@ export function modelSupportsToolSearch(model: LanguageModelChat | IChatEndpoint
n === 'claude-opus-4' || n.startsWith('claude-opus-4-1') || n.startsWith('claude-opus-4-2');
return !isPre45;
};
return matches(id) || matches(family) || isHiddenModelM(family);
return matches(id) || matches(family);
}
/**
@@ -27,7 +27,7 @@ import { IChatWebSocketManager } from '../../networking/node/chatWebSocketManage
import { IExperimentationService } from '../../telemetry/common/nullExperimentationService';
import { ITelemetryService } from '../../telemetry/common/telemetry';
import { TelemetryData } from '../../telemetry/common/telemetryData';
import { getVerbosityForModelSync, isGpt54, isGpt55, isHiddenModelM } from '../common/chatModelCapabilities';
import { getVerbosityForModelSync } from '../common/chatModelCapabilities';
import { rawPartAsCompactionData } from '../common/compactionDataContainer';
import { rawPartAsPhaseData } from '../common/phaseDataContainer';
import { getIndexOfStatefulMarker, getStatefulMarkerAndIndex } from '../common/statefulMarkerContainer';
@@ -160,13 +160,10 @@ export function createResponsesRequestBody(accessor: ServicesAccessor, options:
? (effortFromSetting || options.modelCapabilities?.reasoningEffort || 'medium')
: undefined;
const summary: string | undefined = undefined;
const persistentCoTEnabled = configService.getExperimentBasedConfig(ConfigKey.ResponsesApiPersistentCoTEnabled, expService)
&& (isGpt54(endpoint) || isGpt55(endpoint) || isHiddenModelM(endpoint));
if (effort || summary || persistentCoTEnabled) {
if (effort || summary) {
body.reasoning = {
...(effort ? { effort } : {}),
...(summary ? { summary } : {}),
...(persistentCoTEnabled ? { context: 'all_turns' } : {})
...(summary ? { summary } : {})
};
}
@@ -9,7 +9,6 @@ import { describe, expect, it } from 'vitest';
import { TokenizerType } from '../../../../util/common/tokenizer';
import { IInstantiationService } from '../../../../util/vs/platform/instantiation/common/instantiation';
import { ChatLocation } from '../../../chat/common/commonTypes';
import { ConfigKey, IConfigurationService } from '../../../configuration/common/configurationService';
import { ILogService } from '../../../log/common/logService';
import { isOpenAIContextManagementResponse } from '../../../networking/common/fetch';
import { IChatEndpoint, ICreateEndpointBodyOptions } from '../../../networking/common/networking';
@@ -369,59 +368,6 @@ describe('responseApiInputToRawMessagesForLogging', () => {
});
describe('createResponsesRequestBody', () => {
it('enables persistent CoT on initial requests for hidden model M when the experiment is enabled', () => {
const services = createPlatformServices();
const accessor = services.createTestingAccessor();
const instantiationService = accessor.get(IInstantiationService);
accessor.get(IConfigurationService).setConfig(ConfigKey.ResponsesApiPersistentCoTEnabled, true);
const endpoint = { ...testEndpoint, family: 'ember-alpha', supportsReasoningEffort: ['low', 'medium', 'high'] };
const body = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, createRequestOptions([], false), endpoint.model, endpoint));
expect(body.reasoning).toEqual({ effort: 'medium', context: 'all_turns' });
accessor.dispose();
services.dispose();
});
it('does not enable persistent CoT when the experiment is disabled or the family is unsupported', () => {
const services = createPlatformServices();
const accessor = services.createTestingAccessor();
const instantiationService = accessor.get(IInstantiationService);
const emberEndpoint = { ...testEndpoint, family: 'ember-alpha' };
const unsupportedEndpoint = { ...testEndpoint, model: 'ember-alpha', family: 'other-family' };
const disabledBody = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, createRequestOptions([], false), emberEndpoint.model, emberEndpoint));
accessor.get(IConfigurationService).setConfig(ConfigKey.ResponsesApiPersistentCoTEnabled, true);
const unsupportedBody = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, createRequestOptions([], false), unsupportedEndpoint.model, unsupportedEndpoint));
expect(disabledBody.reasoning?.context).toBeUndefined();
expect(unsupportedBody.reasoning?.context).toBeUndefined();
accessor.dispose();
services.dispose();
});
it('keeps persistent CoT enabled when continuing from a previous response', () => {
const services = createPlatformServices();
const accessor = services.createTestingAccessor();
const instantiationService = accessor.get(IInstantiationService);
accessor.get(IConfigurationService).setConfig(ConfigKey.ResponsesApiPersistentCoTEnabled, true);
const endpoint = { ...testEndpoint, family: 'ember-alpha' };
const messages: Raw.ChatMessage[] = [
createStatefulMarkerMessage(endpoint.model, 'resp-prev'),
{ role: Raw.ChatRole.User, content: [{ type: Raw.ChatCompletionContentPartKind.Text, text: 'continue' }] },
];
const body = instantiationService.invokeFunction(servicesAccessor => createResponsesRequestBody(servicesAccessor, createRequestOptions(messages, false), endpoint.model, endpoint));
expect(body.previous_response_id).toBe('resp-prev');
expect(body.reasoning?.context).toBe('all_turns');
accessor.dispose();
services.dispose();
});
it('extracts compaction threshold from request body context management', () => {
expect(getResponsesApiCompactionThresholdFromBody({
context_management: [{
@@ -75,7 +75,7 @@ export interface IEndpointBody {
prediction?: Prediction;
messages?: any[];
n?: number;
reasoning?: { effort?: string; summary?: string; context?: 'current_turn' | 'all_turns' };
reasoning?: { effort?: string; summary?: string };
tool_choice?: OptionalChatRequestParams['tool_choice'] | { type: 'function'; name: string } | string;
top_logprobs?: number;
intent?: boolean;