From f91303fbdeff24de027fbc673bf103c5347fc26c Mon Sep 17 00:00:00 2001 From: Bhavya U Date: Wed, 13 May 2026 00:07:15 -0700 Subject: [PATCH] Freeze customizations-index per conversation to stabilize system prompt cache (#316191) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Freeze customizations-index per conversation to stabilize system prompt cache Adds the experimental `github.copilot.chat.freezeCustomizationsIndex` setting (advanced/experimental/onExp, default false). When on, the bundled // listing in the system prompt is snapshotted on the first turn and reused on every subsequent turn, preventing per-turn churn (mode swap rewriting the active subagent in , async experimentation flipping a when-gated skill in or out) from invalidating the system prompt cache. When the live listing drifts from the snapshot, the updated set is appended to the latest user message inside AgentUserMessage's context tag — kept inside the captured RenderedUserMessageMetadata so the historical user message replays byte-identically on later turns. Drift also fires with an empty value when the live variable disappears, so the model gets a signal that previously-listed entries are gone. Fixes #315408 Fixes #316182 * Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- extensions/copilot/package.json | 10 ++ extensions/copilot/package.nls.json | 1 + .../extension/prompt/common/conversation.ts | 23 ++- .../prompts/node/agent/agentPrompt.tsx | 136 +++++++++++++++++- .../agent/summarizedConversationHistory.tsx | 9 +- .../prompts/node/panel/customInstructions.tsx | 38 ++++- .../common/configurationService.ts | 3 + 7 files changed, 207 insertions(+), 13 deletions(-) diff --git a/extensions/copilot/package.json b/extensions/copilot/package.json index 329d6124a89..a12de99ce35 100644 --- a/extensions/copilot/package.json +++ b/extensions/copilot/package.json @@ -4113,6 +4113,16 @@ ], "description": "%github.copilot.config.anthropic.promptCaching.extendedTtl%" }, + "github.copilot.chat.freezeCustomizationsIndex": { + "type": "boolean", + "default": false, + "tags": [ + "advanced", + "experimental", + "onExp" + ], + "description": "%github.copilot.config.freezeCustomizationsIndex%" + }, "github.copilot.chat.installExtensionSkill.enabled": { "type": "boolean", "default": false, diff --git a/extensions/copilot/package.nls.json b/extensions/copilot/package.nls.json index c1dd11a1807..bb207a64817 100644 --- a/extensions/copilot/package.nls.json +++ b/extensions/copilot/package.nls.json @@ -340,6 +340,7 @@ "github.copilot.config.useMessagesApi": "Use the Messages API instead of the Chat Completions API when supported.", "github.copilot.config.anthropic.contextEditing.mode": "Select the context editing mode for Anthropic models. Automatically manages conversation context as it grows, helping optimize costs and stay within context window limits.\n\n- `off`: Context editing is disabled.\n- `clear-thinking`: Clears thinking blocks while preserving tool uses.\n- `clear-tooluse`: Clears tool uses while preserving thinking blocks.\n- `clear-both`: Clears both thinking blocks and tool uses.\n\n**Note**: This is an experimental feature. Context editing may cause additional cache rewrites. Enable with caution.", "github.copilot.config.anthropic.promptCaching.extendedTtl": "Use the extended (1 hour) prompt cache TTL on tools and system blocks for the Anthropic Messages API. Only applied to 1M context Claude variants; other models keep the default 5 minute TTL even when this setting is enabled.\n\n**Note**: This is an experimental feature. Only the main agent conversation is eligible — inline chat, terminal chat, notebook chat, and subagent requests are excluded.", + "github.copilot.config.freezeCustomizationsIndex": "Freeze the bundled ``, ``, and `` listing in the system prompt at the first turn of a conversation and reuse it on every subsequent turn. Prevents per-turn churn (e.g. the active mode swapping which subagent entry is listed, or async experimentation flipping a skill in or out) from invalidating the prompt cache. When the listing changes mid-conversation, the updated set is appended to the latest user message so the model still sees instructions, skills, or agents that became available or were removed.\n\n**Note**: This is an experimental feature.", "github.copilot.config.useResponsesApi": "Use the Responses API instead of the Chat Completions API when supported. Enables reasoning and reasoning summaries.\n\n**Note**: This is an experimental feature that is not yet activated for all users.\n\n**Important**: URL API path resolution for custom OpenAI-compatible and Azure models is independent of this setting and fully determined by `url` property of `#github.copilot.chat.customOAIModels#` or `#github.copilot.chat.azureModels#` respectively.", "github.copilot.config.responsesApiReasoningSummary": "Sets the reasoning summary style used for the Responses API. Requires `#github.copilot.chat.useResponsesApi#`.", "github.copilot.config.responsesApiContextManagement.enabled": "Enables context management for the Responses API. Requires `#github.copilot.chat.useResponsesApi#`.", diff --git a/extensions/copilot/src/extension/prompt/common/conversation.ts b/extensions/copilot/src/extension/prompt/common/conversation.ts index 2bb640ef003..b4ec9eea178 100644 --- a/extensions/copilot/src/extension/prompt/common/conversation.ts +++ b/extensions/copilot/src/extension/prompt/common/conversation.ts @@ -4,7 +4,7 @@ *--------------------------------------------------------------------------------------------*/ import { PromptReference, Raw } from '@vscode/prompt-tsx'; -import type { ChatRequest, ChatRequestEditedFileEvent, ChatResponseStream, ChatResult, LanguageModelToolResult } from 'vscode'; +import type { ChatLanguageModelToolReference, ChatRequest, ChatRequestEditedFileEvent, ChatResponseStream, ChatResult, LanguageModelToolResult } from 'vscode'; import { FilterReason } from '../../../platform/networking/common/openai'; import { IWorkspaceService } from '../../../platform/workspace/common/workspaceService'; import { isLocation, toLocation } from '../../../util/common/types'; @@ -445,6 +445,27 @@ export class GlobalContextMessageMetadata { ) { } } +/** + * Captures the customizations-index variable value (the bundled + * ``/``/`` text) as it appeared on the first + * turn of the conversation. Reused on subsequent turns so per-turn churn in + * any of those listings (e.g. the active mode swapping which subagent entry + * appears in ``) does not invalidate the system prompt cache. The + * cacheKey invalidates the snapshot when something genuinely changed (e.g. + * the user opened a different workspace mid-conversation). + * + * The {@link toolReferences} carry byte offsets into {@link value} and must + * be captured together — current-turn references built against current-turn + * text would mis-slice the frozen value. + */ +export class CustomizationsIndexMetadata { + constructor( + readonly value: string, + readonly toolReferences: readonly ChatLanguageModelToolReference[] | undefined, + readonly cacheKey: string + ) { } +} + /** * Captures `prompt_tokens` and `completion_tokens` from the most recent * successful fetch on a turn. All providers return these values in their diff --git a/extensions/copilot/src/extension/prompts/node/agent/agentPrompt.tsx b/extensions/copilot/src/extension/prompts/node/agent/agentPrompt.tsx index 6a1223abf34..00019fc3a2e 100644 --- a/extensions/copilot/src/extension/prompts/node/agent/agentPrompt.tsx +++ b/extensions/copilot/src/extension/prompts/node/agent/agentPrompt.tsx @@ -4,7 +4,7 @@ *--------------------------------------------------------------------------------------------*/ import { BasePromptElementProps, Chunk, Document, PromptElement, PromptPiece, PromptPieceChild, PromptSizing, Raw, SystemMessage, TokenLimit, UserMessage } from '@vscode/prompt-tsx'; -import type { ChatRequestEditedFileEvent, LanguageModelToolInformation, NotebookEditor, TaskDefinition, TextEditor } from 'vscode'; +import type { ChatLanguageModelToolReference, ChatRequestEditedFileEvent, LanguageModelToolInformation, NotebookEditor, TaskDefinition, TextEditor } from 'vscode'; import { sessionResourceToId } from '../../../../platform/chat/common/chatDebugFileLoggerService'; import { ChatLocation } from '../../../../platform/chat/common/commonTypes'; import { ConfigKey, IConfigurationService } from '../../../../platform/configuration/common/configurationService'; @@ -27,7 +27,7 @@ import { IInstantiationService } from '../../../../util/vs/platform/instantiatio import { ChatRequestEditedFileEventKind, Position, Range } from '../../../../vscodeTypes'; import { GenericBasePromptElementProps } from '../../../context/node/resolvers/genericPanelIntentInvocation'; import { ChatVariablesCollection, extractDebugTargetSessionIds, isCustomizationsIndex } from '../../../prompt/common/chatVariablesCollection'; -import { getGlobalContextCacheKey, GlobalContextMessageMetadata, RenderedUserMessageMetadata, Turn } from '../../../prompt/common/conversation'; +import { CustomizationsIndexMetadata, getGlobalContextCacheKey, GlobalContextMessageMetadata, RenderedUserMessageMetadata, Turn } from '../../../prompt/common/conversation'; import { InternalToolReference } from '../../../prompt/common/intents'; import { IPromptVariablesService } from '../../../prompt/node/promptVariablesService'; import { ToolName } from '../../../tools/common/toolNames'; @@ -131,9 +131,10 @@ export class AgentPrompt extends PromptElement { const sessionId = sessionResource ? sessionResourceToId(sessionResource) : undefined; const debugTargetSessionIds = extractDebugTargetSessionIds([...this.props.promptContext.chatVariables].map(v => v.reference)); const templateVariablesContext = this.promptVariablesService.buildTemplateVariablesContext(sessionId, debugTargetSessionIds); + const customizationsSnapshot = this.getOrFreezeCustomizationsIndex(); const baseInstructions = <> {!omitBaseAgentInstructions && baseAgentInstructions} - {await this.getAgentCustomInstructions()} + {await this.getAgentCustomInstructions(customizationsSnapshot?.frozen)} {isAutopilot && When you have fully completed the task, call the task_complete tool to signal that you are done.
IMPORTANT: Before calling task_complete, you MUST provide a brief text summary of what was accomplished in your message. The task is not complete until both the summary and the task_complete call are present. @@ -166,13 +167,14 @@ export class AgentPrompt extends PromptElement { userQueryTagName={userQueryTagName} ReminderInstructionsClass={ReminderInstructionsClass} ToolReferencesHintClass={ToolReferencesHintClass} + customizationsIndexUpdate={customizationsSnapshot?.drift} /> ; } else { return <> {baseInstructions} - + ; } @@ -197,13 +199,15 @@ export class AgentPrompt extends PromptElement { />; } - private async getAgentCustomInstructions() { + private async getAgentCustomInstructions(frozenCustomizationsIndex?: { value: string; toolReferences: readonly ChatLanguageModelToolReference[] | undefined }) { const putCustomInstructionsInSystemMessage = this.configurationService.getConfig(ConfigKey.CustomInstructionsInSystemMessage); const customInstructionsBodyParts: PromptPiece[] = []; customInstructionsBodyParts.push( @@ -225,6 +229,67 @@ export class AgentPrompt extends PromptElement { {customInstructionsBodyParts}; } + /** + * When the experimental `FreezeCustomizationsIndex` setting is enabled, + * snapshot the customizations-index variable on the first turn of the + * conversation and reuse it for every subsequent turn. Stops per-turn + * churn in the bundled ``/``/`` text (e.g. + * the active mode swapping which subagent entry is listed in ``) + * from invalidating the system prompt cache. + * + * Returns: + * - `frozen`: the value (and matching tool-reference offsets) to substitute + * in the system prompt. Always present when the setting is enabled and a + * variable is available. + * - `drift`: the live current-turn value (and offsets) when it differs from + * `frozen`. Rendered in the latest user message so the model sees the + * up-to-date listing without busting the system prompt cache. Also + * emitted as an empty value when the live variable disappears, so the + * model gets a signal that previously listed entries are no longer + * available. + * + * Returns `undefined` overall if no override should apply (setting off, + * no first turn available, or no snapshot yet and the variable is absent + * on this turn). + */ + private getOrFreezeCustomizationsIndex(): { + frozen: { value: string; toolReferences: readonly ChatLanguageModelToolReference[] | undefined }; + drift?: { value: string; toolReferences: readonly ChatLanguageModelToolReference[] | undefined }; + } | undefined { + const enabled = this.configurationService.getExperimentBasedConfig(ConfigKey.Advanced.FreezeCustomizationsIndex, this.experimentationService); + if (!enabled) { + return undefined; + } + const firstTurn = this.props.promptContext.conversation?.turns.at(0); + if (!firstTurn) { + return undefined; + } + const variable = this.props.promptContext.chatVariables.find(isCustomizationsIndex); + const currentValue = variable && typeof variable.value === 'string' ? variable.value : undefined; + const currentToolReferences = variable?.reference.toolReferences; + + const currentCacheKey = this.instantiationService.invokeFunction(getGlobalContextCacheKey); + const existing = firstTurn.getMetadata(CustomizationsIndexMetadata); + if (existing && existing.cacheKey === currentCacheKey) { + const frozen = { value: existing.value, toolReferences: existing.toolReferences }; + // Surface drift in either direction: a different live value, or the + // live variable disappearing entirely (treated as an empty listing). + // Without the second case the model is left looking at the stale + // frozen ``/``/`` block with no signal + // that entries have been removed. + const effectiveCurrent = currentValue ?? ''; + if (effectiveCurrent !== existing.value) { + return { frozen, drift: { value: effectiveCurrent, toolReferences: currentToolReferences } }; + } + return { frozen }; + } + if (currentValue === undefined) { + return undefined; + } + firstTurn.setMetadata(new CustomizationsIndexMetadata(currentValue, currentToolReferences, currentCacheKey)); + return { frozen: { value: currentValue, toolReferences: currentToolReferences } }; + } + private async getOrCreateGlobalAgentContext(endpoint: IChatEndpoint): Promise { const globalContext = await this.getOrCreateGlobalAgentContextContent(endpoint); const isNewChat = this.props.promptContext.history?.length === 0; @@ -325,6 +390,25 @@ export interface AgentUserMessageProps extends BasePromptElementProps, AgentUser readonly additionalHookContext?: string; /** When true, this request was system-initiated (e.g. terminal completion notification) and should skip context/wrapping. */ readonly isSystemInitiated?: boolean; + /** + * Live customizations-index text rendered into the latest user message + * when it has drifted from the frozen snapshot in the system prompt. + * Lets the model see mid-conversation updates (new skill, mode swap, etc.) + * without invalidating the system prompt cache. + * + * Rendered inside this message's `` (rather than as a + * sibling `UserMessage`) so it becomes part of the captured + * `RenderedUserMessageMetadata`. Emitting it as a separate UserMessage + * would cause `result.messages.at(-1)` in `agentIntent.runOne` to point + * at the drift block instead of the user query — the metadata would then + * store just the drift block and historical replays on later turns would + * lose the actual user query, busting cross-turn cache continuity. + * + * Only set when the experimental `FreezeCustomizationsIndex` setting is + * enabled and the current value differs from the snapshot captured on + * the first turn. + */ + readonly customizationsIndexUpdate?: { value: string; toolReferences: readonly ChatLanguageModelToolReference[] | undefined }; } export function getUserMessagePropsFromTurn(turn: Turn, endpoint: IChatEndpoint, customizations?: AgentUserMessageCustomizations): AgentUserMessageProps { @@ -441,6 +525,7 @@ export class AgentUserMessage extends PromptElement { {hasTerminalTool && } {hasTodoTool && } {this.props.additionalHookContext && } + {this.props.customizationsIndexUpdate && } @@ -536,6 +621,47 @@ class AdditionalHookContextPrompt extends PromptElement` alongside `EditedFileEvents`, + * `TerminalStatePromptElement`, etc., so it becomes part of the same + * `UserMessage` and is captured into `RenderedUserMessageMetadata` together + * with the user query. Replays verbatim on subsequent turns as the + * historical user message, preserving cross-turn cache continuity. Emitting + * it as a sibling `UserMessage` would make `result.messages.at(-1)` point at + * the drift block in `agentIntent.runOne`, so the metadata would store just + * the drift block and historical replays on later turns would lose the + * actual user query. + * + * Used only when `FreezeCustomizationsIndex` is on and the live index + * differs from the snapshot captured on the first turn. + */ +class CustomizationsIndexUpdate extends PromptElement { + constructor( + props: CustomizationsIndexUpdateProps, + @IPromptVariablesService private readonly promptVariablesService: IPromptVariablesService, + ) { + super(props); + } + + async render() { + let value = this.props.update.value; + const toolReferences = this.props.update.toolReferences; + if (toolReferences?.length) { + value = await this.promptVariablesService.resolveToolReferencesInPrompt(value, toolReferences); + } + return + The available instructions, skills, and agents have changed since this conversation started. The listings below supersede the ones in the system prompt.
+ {value} +
; + } +} + interface SkillAdherenceReminderProps extends BasePromptElementProps { readonly chatVariables: ChatVariablesCollection; } diff --git a/extensions/copilot/src/extension/prompts/node/agent/summarizedConversationHistory.tsx b/extensions/copilot/src/extension/prompts/node/agent/summarizedConversationHistory.tsx index aaabcb6a965..a368814c9d6 100644 --- a/extensions/copilot/src/extension/prompts/node/agent/summarizedConversationHistory.tsx +++ b/extensions/copilot/src/extension/prompts/node/agent/summarizedConversationHistory.tsx @@ -7,7 +7,7 @@ import * as l10n from '@vscode/l10n'; import { BasePromptElementProps, PrioritizedList, PromptElement, PromptMetadata, PromptSizing, Raw, SystemMessage, UserMessage } from '@vscode/prompt-tsx'; import { BudgetExceededError } from '@vscode/prompt-tsx/dist/base/materialized'; import { ChatMessage } from '@vscode/prompt-tsx/dist/base/output/rawTypes'; -import type { ChatResponsePart, ChatResultPromptTokenDetail, LanguageModelToolInformation, NotebookDocument, Progress } from 'vscode'; +import type { ChatLanguageModelToolReference, ChatResponsePart, ChatResultPromptTokenDetail, LanguageModelToolInformation, NotebookDocument, Progress } from 'vscode'; import { IChatHookService, PreCompactHookInput } from '../../../../platform/chat/common/chatHookService'; import { ChatFetchResponseType, ChatLocation, ChatResponse, FetchSuccess } from '../../../../platform/chat/common/commonTypes'; import { getTextPart } from '../../../../platform/chat/common/globalStringUtils'; @@ -263,7 +263,7 @@ class ConversationHistory extends PromptElement { userQueryTagName: this.props.userQueryTagName, ReminderInstructionsClass: this.props.ReminderInstructionsClass, ToolReferencesHintClass: this.props.ToolReferencesHintClass, - })} />); + })} customizationsIndexUpdate={this.props.customizationsIndexUpdate} />); } // We may have a summary from earlier in the conversation, but skip history if we have a new summary @@ -408,6 +408,11 @@ export interface SummarizedAgentHistoryProps extends BasePromptElementProps, Age readonly summarizationInstructions?: string; /** Skip Full mode and go straight to Simple mode for foreground budget-exceeded recovery. */ readonly forceSimpleSummary?: boolean; + /** + * Forwarded to the latest user message when the customizations-index has + * drifted from its frozen snapshot. See {@link AgentUserMessageProps.customizationsIndexUpdate}. + */ + readonly customizationsIndexUpdate?: { value: string; toolReferences: readonly ChatLanguageModelToolReference[] | undefined }; } /** diff --git a/extensions/copilot/src/extension/prompts/node/panel/customInstructions.tsx b/extensions/copilot/src/extension/prompts/node/panel/customInstructions.tsx index dc45b214c33..8d7179db621 100644 --- a/extensions/copilot/src/extension/prompts/node/panel/customInstructions.tsx +++ b/extensions/copilot/src/extension/prompts/node/panel/customInstructions.tsx @@ -20,6 +20,21 @@ import { Tag } from '../base/tag'; export interface CustomInstructionsProps extends BasePromptElementProps { readonly chatVariables: ChatVariablesCollection | undefined; + /** + * When provided, this value is used in place of the live customizations-index + * variable's value (the bundled ``/``/`` text). + * The agent prompt sets this to freeze the index at the first turn of a + * conversation so per-turn churn doesn't invalidate the system prompt + * cache. + * + * Must be paired with {@link customizationsIndexToolReferencesOverride} + * because tool references carry byte offsets into this text — using the + * current turn's offsets against the frozen text would mis-slice. + */ + readonly customizationsIndexOverride?: string; + /** Tool references paired with {@link customizationsIndexOverride}. */ + readonly customizationsIndexToolReferencesOverride?: readonly ChatLanguageModelToolReference[]; + readonly languageId: string | undefined; /** * @default true @@ -71,14 +86,27 @@ export class CustomInstructions extends PromptElement { if (includeCodeGenerationInstructions !== false) { const hasSeen = new ResourceSet(); const hasSeenContent = new Set(); + + const liveIndexVariable = this.props.chatVariables?.find(isCustomizationsIndex); + const overrideActive = this.props.customizationsIndexOverride !== undefined; + const indexValue = overrideActive + ? this.props.customizationsIndexOverride! + : (liveIndexVariable && typeof liveIndexVariable.value === 'string' ? liveIndexVariable.value : undefined); + const indexToolReferences = overrideActive + ? this.props.customizationsIndexToolReferencesOverride + : liveIndexVariable?.reference.toolReferences; + if (indexValue !== undefined) { + let value = indexValue; + if (indexToolReferences?.length) { + value = await this.promptVariablesService.resolveToolReferencesInPrompt(value, indexToolReferences); + } + chunks.push({value}); + } + if (this.props.chatVariables) { for (const variable of this.props.chatVariables) { if (isCustomizationsIndex(variable)) { - let value = variable.value; - if (variable.reference.toolReferences?.length) { - value = await this.promptVariablesService.resolveToolReferencesInPrompt(value, variable.reference.toolReferences); - } - chunks.push({value}); + continue; } else if (isInstructionFile(variable)) { const value = variable.value; if (!hasSeen.has(value)) { diff --git a/extensions/copilot/src/platform/configuration/common/configurationService.ts b/extensions/copilot/src/platform/configuration/common/configurationService.ts index 319fbae28d3..57ea114f940 100644 --- a/extensions/copilot/src/platform/configuration/common/configurationService.ts +++ b/extensions/copilot/src/platform/configuration/common/configurationService.ts @@ -730,6 +730,9 @@ export namespace ConfigKey { /** Enable extended (1 hour) prompt cache TTL on tools and system blocks for the Anthropic Messages API. Only applied to 1M context Claude variants. */ export const AnthropicExtendedCacheTtl = defineSetting('chat.anthropic.promptCaching.extendedTtl', ConfigType.ExperimentBased, false); + /** Freeze the customizations-index variable (the ``/``/`` block) at the first turn of a conversation and reuse it on subsequent turns. Prevents the system prompt cache from being invalidated by per-turn churn — e.g. the active mode swapping which subagent entry appears in ``, or async experimentation flipping a `when`-gated skill. */ + export const FreezeCustomizationsIndex = defineSetting('chat.freezeCustomizationsIndex', ConfigType.ExperimentBased, false); + export const InlineEditsXtabProviderModelConfiguration = (() => { const oldKey = 'chat.advanced.inlineEdits.xtabProvider.modelConfiguration'; const newKey = 'chat.inlineEdits.xtabProvider.modelConfiguration';