diff --git a/extensions/copilot/package.json b/extensions/copilot/package.json index e42b1658ace..7d07f0bf94f 100644 --- a/extensions/copilot/package.json +++ b/extensions/copilot/package.json @@ -3224,15 +3224,6 @@ "onExp" ] }, - "github.copilot.chat.backgroundCompaction": { - "type": "boolean", - "default": false, - "markdownDescription": "%github.copilot.config.backgroundCompaction%", - "tags": [ - "preview", - "onExp" - ] - }, "github.copilot.chat.anthropic.toolSearchTool.enabled": { "type": "boolean", "default": true, @@ -3253,15 +3244,6 @@ "preview", "onExp" ] - }, - "github.copilot.chat.conversationTranscriptLookup.enabled": { - "type": "boolean", - "default": false, - "description": "%github.copilot.config.conversationTranscriptLookup.enabled%", - "tags": [ - "preview", - "onExp" - ] } } }, diff --git a/extensions/copilot/package.nls.json b/extensions/copilot/package.nls.json index 034d9097c68..7b421582ee4 100644 --- a/extensions/copilot/package.nls.json +++ b/extensions/copilot/package.nls.json @@ -263,7 +263,6 @@ "github.copilot.config.editsNewNotebook.enabled": "Whether to enable the new notebook tool in Copilot Edits.", "github.copilot.config.notebook.inlineEditAgent.enabled": "Enable agent-like behavior from the notebook inline chat widget.", "github.copilot.config.summarizeAgentConversationHistory.enabled": "Whether to auto-compact agent conversation history once the context window is filled.", - "github.copilot.config.conversationTranscriptLookup.enabled": "When enabled, after conversation history is summarized the model is informed it can look up the full conversation transcript via read_file.", "github.copilot.tools.createNewWorkspace.name": "Create New Workspace", "github.copilot.tools.openEmptyFolder.name": "Open an empty folder as VS Code workspace", "github.copilot.tools.getProjectSetupInfo.name": "Get Project Setup Info", @@ -391,9 +390,7 @@ "github.copilot.config.instantApply.shortContextLimit": "Token limit for short context instant apply.", "github.copilot.config.summarizeAgentConversationHistoryThreshold": "Threshold for compacting agent conversation history.", "github.copilot.config.agentHistorySummarizationMode": "Mode for agent history summarization.", - "github.copilot.config.backgroundCompaction": "Enable background compaction of conversation history.", "github.copilot.config.agentHistorySummarizationInline": "Summarize conversation inline within the agent loop instead of a separate LLM call, maximizing prompt cache hits.", - "github.copilot.config.useResponsesApiTruncation": "Use Responses API for truncation.", "github.copilot.config.enableReadFileV2": "Enable version 2 of the read file tool.", "github.copilot.config.enableAskAgent": "Enable the Ask agent for answering questions.", diff --git a/extensions/copilot/src/extension/intents/node/agentIntent.ts b/extensions/copilot/src/extension/intents/node/agentIntent.ts index 80bc3dd9996..a5272f7b092 100644 --- a/extensions/copilot/src/extension/intents/node/agentIntent.ts +++ b/extensions/copilot/src/extension/intents/node/agentIntent.ts @@ -8,7 +8,7 @@ import { Raw, RenderPromptResult } from '@vscode/prompt-tsx'; import { BudgetExceededError } from '@vscode/prompt-tsx/dist/base/materialized'; import type * as vscode from 'vscode'; import { IChatSessionService } from '../../../platform/chat/common/chatSessionService'; -import { ChatLocation, ChatResponse } from '../../../platform/chat/common/commonTypes'; +import { ChatFetchResponseType, ChatLocation, ChatResponse } from '../../../platform/chat/common/commonTypes'; import { ConfigKey, IConfigurationService } from '../../../platform/configuration/common/configurationService'; import { isAnthropicFamily, isGptFamily, modelCanUseApplyPatchExclusively, modelCanUseReplaceStringExclusively, modelSupportsApplyPatch, modelSupportsMultiReplaceString, modelSupportsReplaceString, modelSupportsSimplifiedApplyPatchInstructions } from '../../../platform/endpoint/common/chatModelCapabilities'; import { IEndpointProvider } from '../../../platform/endpoint/common/endpointProvider'; @@ -16,7 +16,7 @@ import { IAutomodeService } from '../../../platform/endpoint/node/automodeServic import { IEnvService } from '../../../platform/env/common/envService'; import { ILogService } from '../../../platform/log/common/logService'; import { IEditLogService } from '../../../platform/multiFileEdit/common/editLogService'; -import { CUSTOM_TOOL_SEARCH_NAME, isAnthropicCustomToolSearchEnabled, isAnthropicToolSearchEnabled } from '../../../platform/networking/common/anthropic'; +import { CUSTOM_TOOL_SEARCH_NAME, isAnthropicCustomToolSearchEnabled, isAnthropicContextEditingEnabled, isAnthropicToolSearchEnabled } from '../../../platform/networking/common/anthropic'; import { IChatEndpoint } from '../../../platform/networking/common/networking'; import { modelsWithoutResponsesContextManagement } from '../../../platform/networking/common/openai'; import { INotebookService } from '../../../platform/notebook/common/notebookService'; @@ -47,13 +47,14 @@ import { IBuildPromptResult, IIntent, IIntentInvocation } from '../../prompt/nod import { AgentPrompt, AgentPromptProps } from '../../prompts/node/agent/agentPrompt'; import { BackgroundSummarizationState, BackgroundSummarizer, IBackgroundSummarizationResult } from '../../prompts/node/agent/backgroundSummarizer'; import { AgentPromptCustomizations, PromptRegistry } from '../../prompts/node/agent/promptRegistry'; -import { SummarizedConversationHistory, SummarizedConversationHistoryMetadata, SummarizedConversationHistoryPropsBuilder } from '../../prompts/node/agent/summarizedConversationHistory'; -import { PromptRenderer } from '../../prompts/node/base/promptRenderer'; +import { extractInlineSummary, InlineSummarizationUserMessage, SummarizedConversationHistory, SummarizedConversationHistoryMetadata, SummarizedConversationHistoryPropsBuilder } from '../../prompts/node/agent/summarizedConversationHistory'; +import { PromptRenderer, renderPromptElement } from '../../prompts/node/base/promptRenderer'; import { ICodeMapperService } from '../../prompts/node/codeMapper/codeMapperService'; import { EditCodePrompt2 } from '../../prompts/node/panel/editCodePrompt2'; import { NotebookInlinePrompt } from '../../prompts/node/panel/notebookInlinePrompt'; import { ToolResultMetadata } from '../../prompts/node/panel/toolCalling'; import { IEditToolLearningService } from '../../tools/common/editToolLearningService'; +import { normalizeToolSchema } from '../../tools/common/toolSchemaNormalizer'; import { ContributedToolName, ToolName } from '../../tools/common/toolNames'; import { IToolsService } from '../../tools/common/toolsService'; import { applyPatch5Description } from '../../tools/node/applyPatchTool'; @@ -62,8 +63,7 @@ import { replaceStringBatchDescription } from '../../tools/node/replaceStringToo import { getAgentMaxRequests } from '../common/agentConfig'; import { addCacheBreakpoints } from './cacheBreakpoints'; import { EditCodeIntent, EditCodeIntentInvocation, EditCodeIntentInvocationOptions, mergeMetadata, toNewChatReferences } from './editCodeIntent'; - -const INLINE_SUMMARIZATION_BUDGET_EXPANSION = 1.15; +import { ToolCallingLoop } from './toolCallingLoop'; function isResponsesCompactionContextManagementEnabled(endpoint: IChatEndpoint, configurationService: IConfigurationService, experimentationService: IExperimentationService): boolean { return endpoint.apiType === 'responses' @@ -356,6 +356,9 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I private _lastRenderTokenCount: number = 0; + /** Cached model capabilities from the most recent main agent render, reused by the background summarizer. */ + private _lastModelCapabilities: { enableThinking: boolean; reasoningEffort: string | undefined; enableToolSearch: boolean; enableContextEditing: boolean } | undefined; + constructor( intent: IIntent, location: ChatLocation, @@ -418,9 +421,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I const useTruncation = this.endpoint.apiType === 'responses' && this.configurationService.getConfig(ConfigKey.Advanced.UseResponsesApiTruncation); const responsesCompactionContextManagementEnabled = isResponsesCompactionContextManagementEnabled(this.endpoint, this.configurationService, this.expService); const summarizationEnabled = this.configurationService.getConfig(ConfigKey.SummarizeAgentConversationHistory) && this.prompt === AgentPrompt && !responsesCompactionContextManagementEnabled; - const inlineSummarizationEnabled = summarizationEnabled && this.configurationService.getExperimentBasedConfig(ConfigKey.Advanced.AgentHistorySummarizationInline, this.expService); - // Disable background compaction when inline summarization is active — they solve the same problem - const backgroundCompactionEnabled = summarizationEnabled && !inlineSummarizationEnabled && this.configurationService.getExperimentBasedConfig(ConfigKey.BackgroundCompaction, this.expService); + const useInlineSummarization = summarizationEnabled && this.configurationService.getExperimentBasedConfig(ConfigKey.Advanced.AgentHistorySummarizationInline, this.expService); // When tools are present, apply a 10% safety margin on the message portion // to account for tokenizer discrepancies between our tool-token counter and @@ -432,7 +433,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I const safeBudget = useTruncation ? Number.MAX_SAFE_INTEGER : messageBudget; const endpoint = toolTokens > 0 ? this.endpoint.cloneWithTokenOverride(safeBudget) : this.endpoint; - this.logService.debug(`AgentIntent: rendering with budget=${safeBudget} (baseBudget: ${baseBudget}, toolTokens: ${toolTokens}, totalTools: ${tools?.length ?? 0}, toolSearchEnabled: ${toolSearchEnabled}), summarizationEnabled=${summarizationEnabled}`); + this.logService.debug(`[Agent] rendering with budget=${safeBudget} (baseBudget: ${baseBudget}, toolTokens: ${toolTokens}, totalTools: ${tools?.length ?? 0}, toolSearchEnabled: ${toolSearchEnabled}), summarizationEnabled=${summarizationEnabled}`); let result: RenderPromptResult; const props: AgentPromptProps = { endpoint, @@ -449,94 +450,42 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I customizations: this._resolvedCustomizations }; - // ── Background compaction: dual-threshold approach ──────────────── + // ── Background compaction ──────────────────────────────────────── // - // Background compaction thresholds (checked post-render using the - // actual tokenCount from the current render): + // Pre-render: if a previous bg pass completed, apply it now. // - // Completed (previous bg pass) → apply the summary before rendering. + // BudgetExceeded: if bg is InProgress/Completed, wait/apply. + // Otherwise fall back to foreground summarization. // - // ≥ 95% + InProgress → block on the background compaction - // completing, then apply before rendering. + // Post-render (≥ 80% + Idle): kick off background compaction + // so it is ready for a future turn. // - // ≥ 80% + Idle (post-render) → kick off background compaction so - // it is ready for a future iteration. - // - const backgroundSummarizer = backgroundCompactionEnabled ? this._getOrCreateBackgroundSummarizer(promptContext.conversation?.sessionId) : undefined; + const backgroundSummarizer = summarizationEnabled ? this._getOrCreateBackgroundSummarizer(promptContext.conversation?.sessionId) : undefined; const contextRatio = backgroundSummarizer && baseBudget > 0 ? (this._lastRenderTokenCount + toolTokens) / baseBudget : 0; - // ── Proactive inline summarization: pre-render check ────────────── - // Use _lastRenderTokenCount (from the previous iteration) to decide - // whether to append the summarize instruction *before* the main - // render, avoiding a wasteful double-render. - // Guard: skip when a summary was already stored on the current or - // most-recent history turn — _lastRenderTokenCount is stale from the - // summarization render and would falsely re-trigger. - let proactiveInlineSummarization = false; - if (inlineSummarizationEnabled && baseBudget > 0) { - const hasRecentSummary = promptContext.toolCallRounds?.some(r => r.summary) - || promptContext.history.at(-1)?.rounds.some(r => r.summary); - if (!hasRecentSummary) { - const preRenderRatio = (this._lastRenderTokenCount + toolTokens) / baseBudget; - if (preRenderRatio >= 0.85) { - this.logService.debug(`[Agent] pre-render at ${(preRenderRatio * 100).toFixed(0)}% — proactively enabling inline summarization`); - proactiveInlineSummarization = true; - } - } - } - // Track whether we applied a summary in this iteration so we don't // immediately re-trigger background compaction in the post-render check. let summaryAppliedThisIteration = false; - // 1. If a previous background pass completed, apply its summary now. - if (backgroundCompactionEnabled && backgroundSummarizer?.state === BackgroundSummarizationState.Completed) { + // If a previous background pass completed, apply its summary now. + if (summarizationEnabled && backgroundSummarizer?.state === BackgroundSummarizationState.Completed) { const bgResult = backgroundSummarizer.consumeAndReset(); if (bgResult) { - this.logService.debug(`[Agent] applying completed background summary (roundId=${bgResult.toolCallRoundId})`); + this.logService.debug(`[ConversationHistorySummarizer] applying completed background summary (roundId=${bgResult.toolCallRoundId})`); progress.report(new ChatResponseProgressPart2(l10n.t('Compacted conversation'), async () => l10n.t('Compacted conversation'))); this._applySummaryToRounds(bgResult, promptContext); this._persistSummaryOnTurn(bgResult, promptContext, this._lastRenderTokenCount); this._sendBackgroundCompactionTelemetry('preRender', 'applied', contextRatio, promptContext); summaryAppliedThisIteration = true; } else { - this.logService.warn(`[Agent] background compaction state was Completed but consumeAndReset returned no result`); + this.logService.warn(`[ConversationHistorySummarizer] background compaction state was Completed but consumeAndReset returned no result`); this._sendBackgroundCompactionTelemetry('preRender', 'noResult', contextRatio, promptContext); this._recordBackgroundCompactionFailure(promptContext, 'preRender'); } } - // 2. At ≥ 95% — block and wait for the in-progress compaction, - // then apply the result before rendering. - if (backgroundCompactionEnabled && backgroundSummarizer && contextRatio >= 0.95 && backgroundSummarizer.state === BackgroundSummarizationState.InProgress) { - this.logService.debug(`[Agent] context at ${(contextRatio * 100).toFixed(0)}% — blocking on background compaction`); - const summaryPromise = backgroundSummarizer.waitForCompletion(); - progress.report(new ChatResponseProgressPart2(l10n.t('Compacting conversation...'), async () => { - try { await summaryPromise; } catch { } - return l10n.t('Compacted conversation'); - })); - await summaryPromise; - const bgResult = backgroundSummarizer.consumeAndReset(); - if (bgResult) { - this.logService.debug(`[Agent] background compaction completed — applying result (roundId=${bgResult.toolCallRoundId})`); - this._applySummaryToRounds(bgResult, promptContext); - this._persistSummaryOnTurn(bgResult, promptContext, this._lastRenderTokenCount); - this._sendBackgroundCompactionTelemetry('preRenderBlocked', 'applied', contextRatio, promptContext); - summaryAppliedThisIteration = true; - } else { - this.logService.debug(`[Agent] background compaction finished but produced no usable result — will attempt foreground summarization if budget exceeded`); - this._sendBackgroundCompactionTelemetry('preRenderBlocked', 'noResult', contextRatio, promptContext); - this._recordBackgroundCompactionFailure(promptContext, 'preRenderBlocked'); - // Don't attempt a foreground fallback here — the main render below - // will either succeed (context estimate was pessimistic) or throw - // BudgetExceededError, which the catch block handles with foreground - // summarization. Short-circuiting here would skip the main render - // unnecessarily when it might still fit. - } - } - // Render the prompt without summarization or cache breakpoints, using // the original endpoint (not reduced for tools/safety buffer). const renderWithoutSummarization = async (reason: string, renderProps: AgentPromptProps = props): Promise => { @@ -567,7 +516,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I const turn = promptContext.conversation?.getLatestTurn(); const previousForegroundSummary = turn?.getMetadata(SummarizedConversationHistoryMetadata); if (previousForegroundSummary?.source === 'foreground' && previousForegroundSummary.outcome && previousForegroundSummary.outcome !== 'success') { - this.logService.debug(`[Agent] ${reason}, skipping repeated foreground summarization after prior failure (${previousForegroundSummary.outcome})`); + this.logService.debug(`[ConversationHistorySummarizer] ${reason}, skipping repeated foreground summarization after prior failure (${previousForegroundSummary.outcome})`); /* __GDPR__ "triggerSummarizeSkipped" : { "owner": "bhavyau", @@ -581,7 +530,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I return renderWithoutSummarization(`skipping repeated foreground summarization after prior failure (${previousForegroundSummary.outcome})`, renderProps); } - this.logService.debug(`[Agent] ${reason}, triggering summarization`); + this.logService.debug(`[ConversationHistorySummarizer] ${reason}, triggering summarization`); try { const renderer = PromptRenderer.create(this.instantiationService, this.endpoint, this.prompt, { ...renderProps, @@ -591,7 +540,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I }); return await renderer.render(progress, token); } catch (e) { - this.logService.error(e, `[Agent] summarization failed`); + this.logService.error(e, `[ConversationHistorySummarizer] summarization failed`); const errorKind = e instanceof BudgetExceededError ? 'budgetExceeded' : 'error'; /* __GDPR__ "triggerSummarizeFailed" : { @@ -621,36 +570,10 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I } }; - // Helper function for inline summarization — appends summarize instruction - // as a user message in the agent loop instead of making a separate LLM call. - // Returns the render result with InlineSummarizationRequestedMetadata set. - const renderWithInlineSummarization = async (reason: string, renderProps: AgentPromptProps = props): Promise => { - this.logService.debug(`[Agent] ${reason}, triggering inline summarization`); - try { - // Expand from the *base* endpoint (not renderProps.endpoint which may already be expanded) - const expandedEndpoint = endpoint.cloneWithTokenOverride(endpoint.modelMaxPromptTokens * INLINE_SUMMARIZATION_BUDGET_EXPANSION); - const renderer = PromptRenderer.create(this.instantiationService, expandedEndpoint, this.prompt, { - ...renderProps, - endpoint: expandedEndpoint, - inlineSummarization: true, - }); - return await renderer.render(progress, token); - } catch (e) { - this.logService.error(e, `[Agent] inline summarization render failed, falling back to separate-call summarization`); - return await renderWithSummarization(`inline summarization failed (${e instanceof Error ? e.message : e}), falling back`, renderProps); - } - }; - const contextLengthBefore = this._lastRenderTokenCount; try { - const renderEndpoint = proactiveInlineSummarization - ? endpoint.cloneWithTokenOverride(endpoint.modelMaxPromptTokens * INLINE_SUMMARIZATION_BUDGET_EXPANSION) - : endpoint; - const renderProps: AgentPromptProps = proactiveInlineSummarization - ? { ...props, endpoint: renderEndpoint, inlineSummarization: true } - : props; - const renderer = PromptRenderer.create(this.instantiationService, renderEndpoint, this.prompt, renderProps); + const renderer = PromptRenderer.create(this.instantiationService, endpoint, this.prompt, props); result = await renderer.render(progress, token); } catch (e) { if (e instanceof BudgetExceededError && summarizationEnabled) { @@ -670,7 +593,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I let budgetExceededTrigger: string; if (backgroundSummarizer.state === BackgroundSummarizationState.InProgress) { budgetExceededTrigger = 'budgetExceededWaited'; - this.logService.debug(`[Agent] budget exceeded — waiting on in-progress background compaction instead of new request`); + this.logService.debug(`[ConversationHistorySummarizer] budget exceeded — waiting on in-progress background compaction instead of new request`); const summaryPromise = backgroundSummarizer.waitForCompletion(); progress.report(new ChatResponseProgressPart2(l10n.t('Compacting conversation...'), async () => { try { await summaryPromise; } catch { } @@ -679,12 +602,12 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I await summaryPromise; } else { budgetExceededTrigger = 'budgetExceededReady'; - this.logService.debug(`[Agent] budget exceeded — applying already-completed background compaction`); + this.logService.debug(`[ConversationHistorySummarizer] budget exceeded — applying already-completed background compaction`); progress.report(new ChatResponseProgressPart2(l10n.t('Compacted conversation'), async () => l10n.t('Compacted conversation'))); } const bgResult = backgroundSummarizer.consumeAndReset(); if (bgResult) { - this.logService.debug(`[Agent] background compaction applied after budget exceeded (roundId=${bgResult.toolCallRoundId})`); + this.logService.debug(`[ConversationHistorySummarizer] background compaction applied after budget exceeded (roundId=${bgResult.toolCallRoundId})`); this._applySummaryToRounds(bgResult, promptContext); this._persistSummaryOnTurn(bgResult, promptContext, contextLengthBefore); this._sendBackgroundCompactionTelemetry(budgetExceededTrigger, 'applied', contextRatio, promptContext); @@ -693,14 +616,12 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I const renderer = PromptRenderer.create(this.instantiationService, endpoint, this.prompt, { ...props, promptContext }); result = await renderer.render(progress, token); } else { - this.logService.debug(`[Agent] background compaction produced no usable result after budget exceeded — falling back to synchronous summarization`); + this.logService.debug(`[ConversationHistorySummarizer] background compaction produced no usable result after budget exceeded — falling back to synchronous summarization`); this._sendBackgroundCompactionTelemetry(budgetExceededTrigger, 'noResult', contextRatio, promptContext); this._recordBackgroundCompactionFailure(promptContext, budgetExceededTrigger); // Background compaction failed — fall back to synchronous summarization result = await renderWithSummarization(`budget exceeded(${e.message}), background compaction failed`); } - } else if (inlineSummarizationEnabled) { - result = await renderWithInlineSummarization(`budget exceeded(${e.message})`); } else { result = await renderWithSummarization(`budget exceeded(${e.message})`); } @@ -734,47 +655,27 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I )); } - // 3. Post-render background compaction checks. - if (backgroundCompactionEnabled && backgroundSummarizer && !summaryAppliedThisIteration) { + // Post-render: kick off background compaction at ≥ 80% if idle. + if (summarizationEnabled && backgroundSummarizer && !summaryAppliedThisIteration) { const postRenderRatio = baseBudget > 0 ? (result.tokenCount + toolTokens) / baseBudget : 0; - if (postRenderRatio >= 0.95 && backgroundSummarizer.state === BackgroundSummarizationState.InProgress) { - // At ≥ 95% with a background compaction already running — block, - // wait for it, apply the result, and re-render so the LLM gets - // the compacted prompt instead of the oversized one. - this.logService.debug(`[Agent] post-render at ${(postRenderRatio * 100).toFixed(0)}% — blocking on in-progress background compaction`); - const summaryPromise = backgroundSummarizer.waitForCompletion(); - progress.report(new ChatResponseProgressPart2(l10n.t('Compacting conversation...'), async () => { - try { await summaryPromise; } catch { } - return l10n.t('Compacted conversation'); - })); - await summaryPromise; - const bgResult = backgroundSummarizer.consumeAndReset(); - if (bgResult) { - this.logService.debug(`[Agent] post-render background compaction completed — applying result and re-rendering (roundId=${bgResult.toolCallRoundId})`); - this._applySummaryToRounds(bgResult, promptContext); - this._persistSummaryOnTurn(bgResult, promptContext, result.tokenCount); - this._sendBackgroundCompactionTelemetry('postRenderBlocked', 'applied', postRenderRatio, promptContext); - // Re-render with compacted history so the LLM receives the smaller prompt - const reRenderer = PromptRenderer.create(this.instantiationService, endpoint, this.prompt, { ...props, promptContext }); - result = await reRenderer.render(progress, token); - this._lastRenderTokenCount = result.tokenCount; - } else { - this.logService.debug(`[Agent] post-render background compaction finished but produced no usable result — falling back to foreground summarization`); - this._sendBackgroundCompactionTelemetry('postRenderBlocked', 'noResult', postRenderRatio, promptContext); - this._recordBackgroundCompactionFailure(promptContext, 'postRenderBlocked'); - try { - result = await renderWithSummarization('post-render background compaction noResult fallback'); - this._lastRenderTokenCount = result.tokenCount; - } catch (e) { - this.logService.error(e, `[Agent] post-render foreground summarization fallback also failed — using original render result`); - } + if (postRenderRatio >= 0.80 && (backgroundSummarizer.state === BackgroundSummarizationState.Idle || backgroundSummarizer.state === BackgroundSummarizationState.Failed)) { + if (useInlineSummarization) { + // Compute and cache model capabilities from the current render's + // messages. These must match the main agent fetch for cache parity. + const strippedMessages = ToolCallingLoop.stripInternalToolCallIds(result.messages); + const rawEffort = this.request.modelConfiguration?.reasoningEffort; + const isSubagent = !!this.request.subAgentInvocationId; + this._lastModelCapabilities = { + enableThinking: !isAnthropicFamily(this.endpoint) || ToolCallingLoop.messagesContainThinking(strippedMessages), + reasoningEffort: typeof rawEffort === 'string' ? rawEffort : undefined, + enableToolSearch: !isSubagent && isAnthropicToolSearchEnabled(this.endpoint, this.configurationService), + enableContextEditing: !isSubagent && isAnthropicContextEditingEnabled(this.endpoint, this.configurationService, this.expService), + }; } - } else if (postRenderRatio >= 0.80 && (backgroundSummarizer.state === BackgroundSummarizationState.Idle || backgroundSummarizer.state === BackgroundSummarizationState.Failed)) { - // At ≥ 80% with no running compaction (or a previous failure) — kick off background work. - this._startBackgroundSummarization(backgroundSummarizer, props, token, postRenderRatio); + this._startBackgroundSummarization(backgroundSummarizer, result.messages, promptContext, props, token, postRenderRatio, useInlineSummarization); } } @@ -841,52 +742,239 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I private _startBackgroundSummarization( backgroundSummarizer: BackgroundSummarizer, + mainRenderMessages: Raw.ChatMessage[], + promptContext: IBuildPromptContext, props: AgentPromptProps, token: vscode.CancellationToken, contextRatio: number, + useInlineSummarization: boolean, ): void { - this.logService.debug(`[Agent] context at ${(contextRatio * 100).toFixed(0)}% — starting background compaction`); - // Deep-copy toolCallRounds and toolCallResults so the background render - // sees a frozen snapshot and doesn't drift as the main loop adds rounds. - const snapshotProps: AgentPromptProps = { - ...props, - promptContext: { - ...props.promptContext, - toolCallRounds: props.promptContext.toolCallRounds ? [...props.promptContext.toolCallRounds] : undefined, - toolCallResults: props.promptContext.toolCallResults ? { ...props.promptContext.toolCallResults } : undefined, - } - }; - const bgRenderer = PromptRenderer.create(this.instantiationService, this.endpoint, this.prompt, { - ...snapshotProps, - endpoint: this.endpoint, - promptContext: snapshotProps.promptContext, - triggerSummarize: true, - summarizationSource: 'background', - }); - const bgProgress: vscode.Progress = { report: () => { } }; + this.logService.debug(`[ConversationHistorySummarizer] context at ${(contextRatio * 100).toFixed(0)}% — starting background compaction (inline=${useInlineSummarization})`); + const bgStartTime = Date.now(); + + // Snapshot rounds so telemetry reflects state at kick-off time, not at + // completion time (the main loop mutates toolCallRounds). History is + // stable across a single user turn so a reference is sufficient. + const rounds = [...(promptContext.toolCallRounds ?? [])]; + const history = promptContext.history; + let toolCallRoundId: string | undefined; + if (rounds.length >= 2) { + // Mark the round before the last, preserving the last round verbatim + toolCallRoundId = rounds[rounds.length - 2].id; + } else if (rounds.length === 1) { + toolCallRoundId = rounds[0].id; + } else { + for (let i = history.length - 1; i >= 0 && !toolCallRoundId; i--) { + const lastRound = history[i].rounds.at(-1); + if (lastRound) { + toolCallRoundId = lastRound.id; + } + } + } + + // Build tool schemas matching the main agent loop so the prompt + // prefix (system + tools + messages) is identical for cache hits. + const availableTools = promptContext.tools?.availableTools; + const normalizedTools = availableTools?.length ? normalizeToolSchema( + this.endpoint.family, + availableTools.map(tool => ({ + function: { + name: tool.name, + description: tool.description, + parameters: tool.inputSchema && Object.keys(tool.inputSchema).length ? tool.inputSchema : undefined + }, + type: 'function' as const, + })), + (tool, rule) => { + this.logService.warn(`[ConversationHistorySummarizer] Tool ${tool} failed validation: ${rule}`); + }, + ) : undefined; + const toolOpts = normalizedTools?.length ? { + tools: normalizedTools, + } : undefined; + + const associatedRequestId = promptContext.conversation?.getLatestTurn()?.id; + const conversationId = promptContext.conversation?.sessionId; + const modelCapabilities = this._lastModelCapabilities; + backgroundSummarizer.start(async bgToken => { try { - const bgRenderResult = await bgRenderer.render(bgProgress, bgToken); - const summaryMetadata = bgRenderResult.metadata.get(SummarizedConversationHistoryMetadata); - if (!summaryMetadata) { - throw new Error('Background compaction produced no summary metadata'); + if (useInlineSummarization) { + // Inline mode: fork the exact messages from the main render + // and append a summary user message. The prompt prefix is + // byte-identical to the main agent loop for cache hits. + const strippedMainMessages = ToolCallingLoop.stripInternalToolCallIds(mainRenderMessages); + const summaryMsgResult = await renderPromptElement( + this.instantiationService, + this.endpoint, + InlineSummarizationUserMessage, + { endpoint: this.endpoint }, + undefined, + bgToken, + ); + const messages = [ + ...strippedMainMessages, + ...summaryMsgResult.messages, + ]; + + const response = await this.endpoint.makeChatRequest2({ + debugName: 'summarizeConversationHistory-inline', + messages, + finishedCb: undefined, + location: ChatLocation.Agent, + conversationId, + requestOptions: { + temperature: 0, + stream: false, + ...toolOpts, + }, + modelCapabilities, + telemetryProperties: associatedRequestId ? { associatedRequestId } : undefined, + enableRetryOnFilter: true, + }, bgToken); + if (response.type !== ChatFetchResponseType.Success) { + throw new Error(`Background inline summarization request failed: ${response.type}`); + } + const summaryText = extractInlineSummary(response.value); + if (!summaryText) { + throw new Error('Background inline summarization: no tags found in response'); + } + if (!toolCallRoundId) { + throw new Error('Background inline summarization: no round ID to apply summary to'); + } + this.logService.debug(`[ConversationHistorySummarizer] background inline compaction completed (${summaryText.length} chars, roundId=${toolCallRoundId})`); + + // Send summarizedConversationHistory telemetry for parity + // with the standard ConversationHistorySummarizer path. + const numRoundsInHistory = history.reduce((sum, t) => sum + t.rounds.length, 0); + const numRoundsInCurrentTurn = rounds.length; + const lastUsedTool = rounds.at(-1)?.toolCalls?.at(-1)?.name + ?? history.at(-1)?.rounds.at(-1)?.toolCalls?.at(-1)?.name ?? 'none'; + /* __GDPR__ + "summarizedConversationHistory" : { + "owner": "bhavyau", + "comment": "Tracks background inline summarization outcome", + "outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state." }, + "model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." }, + "summarizationMode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The summarization mode." }, + "source": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether background or foreground." }, + "conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Session id." }, + "chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID." }, + "lastUsedTool": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The last tool used before summarization." }, + "requestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The request ID from the summarization call." }, + "numRounds": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Total tool call rounds." }, + "turnIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current turn." }, + "curTurnRoundIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current round within the current turn." }, + "isDuringToolCalling": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Whether this was triggered during tool calling." }, + "duration": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Duration in ms." }, + "promptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Prompt tokens." }, + "promptCacheTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Cached prompt tokens." }, + "responseTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Output tokens." } + } + */ + this.telemetryService.sendMSFTTelemetryEvent('summarizedConversationHistory', { + outcome: 'success', + model: this.endpoint.model, + summarizationMode: 'inline', + source: 'background', + conversationId, + chatRequestId: associatedRequestId, + lastUsedTool, + requestId: response.requestId, + }, { + numRounds: numRoundsInHistory + numRoundsInCurrentTurn, + turnIndex: history.length, + curTurnRoundIndex: numRoundsInCurrentTurn, + isDuringToolCalling: numRoundsInCurrentTurn > 0 ? 1 : 0, + duration: Date.now() - bgStartTime, + promptTokenCount: response.usage?.prompt_tokens, + promptCacheTokenCount: response.usage?.prompt_tokens_details?.cached_tokens, + responseTokenCount: response.usage?.completion_tokens, + }); + + return { + summary: summaryText, + toolCallRoundId, + promptTokens: response.usage?.prompt_tokens, + promptCacheTokens: response.usage?.prompt_tokens_details?.cached_tokens, + outputTokens: response.usage?.completion_tokens, + durationMs: Date.now() - bgStartTime, + model: this.endpoint.model, + summarizationMode: 'inline', + numRounds: undefined, + numRoundsSinceLastSummarization: undefined, + }; + } else { + // Standard mode: use triggerSummarize which makes a separate + // LLM call with a summarization-specific prompt during render. + const snapshotProps: AgentPromptProps = { + ...props, + promptContext: { + ...promptContext, + toolCallRounds: promptContext.toolCallRounds ? [...promptContext.toolCallRounds] : undefined, + toolCallResults: promptContext.toolCallResults ? { ...promptContext.toolCallResults } : undefined, + } + }; + const bgRenderer = PromptRenderer.create(this.instantiationService, this.endpoint, this.prompt, { + ...snapshotProps, + endpoint: this.endpoint, + promptContext: snapshotProps.promptContext, + triggerSummarize: true, + summarizationSource: 'background', + }); + const bgProgress: vscode.Progress = { report: () => { } }; + const bgRenderResult = await bgRenderer.render(bgProgress, bgToken); + const summaryMetadata = bgRenderResult.metadata.get(SummarizedConversationHistoryMetadata); + if (!summaryMetadata) { + throw new Error('Background compaction produced no summary metadata'); + } + this.logService.debug(`[ConversationHistorySummarizer] background compaction completed successfully (roundId=${summaryMetadata.toolCallRoundId})`); + return { + summary: summaryMetadata.text, + toolCallRoundId: summaryMetadata.toolCallRoundId, + promptTokens: summaryMetadata.usage?.prompt_tokens, + promptCacheTokens: summaryMetadata.usage?.prompt_tokens_details?.cached_tokens, + outputTokens: summaryMetadata.usage?.completion_tokens, + durationMs: Date.now() - bgStartTime, + model: summaryMetadata.model, + summarizationMode: summaryMetadata.summarizationMode, + numRounds: summaryMetadata.numRounds, + numRoundsSinceLastSummarization: summaryMetadata.numRoundsSinceLastSummarization, + }; } - this.logService.debug(`[Agent] background compaction completed successfully (roundId=${summaryMetadata.toolCallRoundId})`); - return { - summary: summaryMetadata.text, - toolCallRoundId: summaryMetadata.toolCallRoundId, - promptTokens: summaryMetadata.usage?.prompt_tokens, - promptCacheTokens: summaryMetadata.usage?.prompt_tokens_details?.cached_tokens, - outputTokens: summaryMetadata.usage?.completion_tokens, - durationMs: Date.now() - bgStartTime, - model: summaryMetadata.model, - summarizationMode: summaryMetadata.summarizationMode, - numRounds: summaryMetadata.numRounds, - numRoundsSinceLastSummarization: summaryMetadata.numRoundsSinceLastSummarization, - }; } catch (err) { - this.logService.error(err, `[Agent] background compaction failed`); + this.logService.error(err, `[ConversationHistorySummarizer] background compaction failed`); + + // Send failure telemetry for inline background summarization + if (useInlineSummarization) { + /* __GDPR__ + "summarizedConversationHistory" : { + "owner": "bhavyau", + "comment": "Tracks background inline summarization failure", + "outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state." }, + "detailedOutcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Detailed failure reason." }, + "model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." }, + "summarizationMode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The summarization mode." }, + "source": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether background or foreground." }, + "conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Session id." }, + "chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID." }, + "duration": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Duration in ms." } + } + */ + this.telemetryService.sendMSFTTelemetryEvent('summarizedConversationHistory', { + outcome: 'failed', + detailedOutcome: err instanceof Error ? err.message : String(err), + model: this.endpoint.model, + summarizationMode: 'inline', + source: 'background', + conversationId, + chatRequestId: associatedRequestId, + }, { + duration: Date.now() - bgStartTime, + }); + } + throw err; } }, token); @@ -924,7 +1012,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I } } if (!found) { - this.logService.warn(`[Agent] background compaction round ${bgResult.toolCallRoundId} not found in toolCallRounds or history — summary dropped`); + this.logService.warn(`[ConversationHistorySummarizer] background compaction round ${bgResult.toolCallRoundId} not found in toolCallRounds or history — summary dropped`); } } // Invalidate the auto mode router cache so the next getChatEndpoint() diff --git a/extensions/copilot/src/extension/intents/node/toolCallingLoop.ts b/extensions/copilot/src/extension/intents/node/toolCallingLoop.ts index b7e8b4d3cca..29737cdef4d 100644 --- a/extensions/copilot/src/extension/intents/node/toolCallingLoop.ts +++ b/extensions/copilot/src/extension/intents/node/toolCallingLoop.ts @@ -48,7 +48,7 @@ import { ThinkingDataItem, ToolCallRound } from '../../prompt/common/toolCallRou import { IBuildPromptResult, IResponseProcessor } from '../../prompt/node/intents'; import { PseudoStopStartResponseProcessor } from '../../prompt/node/pseudoStartStopConversationCallback'; import { ResponseProcessorContext } from '../../prompt/node/responseProcessorContext'; -import { extractInlineSummary, InlineSummarizationRequestedMetadata, SummarizedConversationHistoryMetadata } from '../../prompts/node/agent/summarizedConversationHistory'; +import { SummarizedConversationHistoryMetadata } from '../../prompts/node/agent/summarizedConversationHistory'; import { ToolFailureEncountered, ToolResultMetadata } from '../../prompts/node/panel/toolCalling'; import { ToolName } from '../../tools/common/toolNames'; import { IToolsService, ToolCallCancelledError } from '../../tools/common/toolsService'; @@ -355,9 +355,6 @@ export abstract class ToolCallingLoop | undefined; - private inlineSummarizationProgressDeferred: DeferredPromise | undefined; - /** Set to true before calling fetch() when the current iteration is an inline summarization request. */ - protected _isInlineSummarizationRequest = false; /** * Autopilot stop hook — the model needs to call `task_complete` to signal it's done. @@ -913,145 +910,6 @@ export abstract class ToolCallingLoop sum + t.rounds.length, 0); - const numRoundsInCurrentTurn = toolCallRoundsForTelemetry.length; - const numRounds = numRoundsInHistory + numRoundsInCurrentTurn; - const lastUsedTool = toolCallRoundsForTelemetry.at(-1)?.toolCalls.at(-1)?.name - ?? history.at(-1)?.rounds.at(-1)?.toolCalls.at(-1)?.name ?? 'none'; - - // Compute rounds since last summarization (same logic as ConversationHistorySummarizer) - let numRoundsSinceLastSummarization = -1; - for (let ri = toolCallRoundsForTelemetry.length - 1; ri >= 0; ri--) { - if (toolCallRoundsForTelemetry[ri].summary) { - numRoundsSinceLastSummarization = toolCallRoundsForTelemetry.length - 1 - ri; - break; - } - } - if (numRoundsSinceLastSummarization === -1) { - let count = numRoundsInCurrentTurn; - outerLoop: for (let ti = history.length - 1; ti >= 0; ti--) { - for (let ri = history[ti].rounds.length - 1; ri >= 0; ri--) { - if (history[ti].rounds[ri].summary) { - numRoundsSinceLastSummarization = count; - break outerLoop; - } - count++; - } - } - } - - const inlineSummarizationMeta = new SummarizedConversationHistoryMetadata( - summarizedRound, - summaryText, - { - usage, - model: resolvedModel, - summarizationMode: 'inline', - numRounds, - numRoundsSinceLastSummarization, - source: 'foreground', - outcome: 'success', - }, - ); - turn.setMetadata(inlineSummarizationMeta); - - // Fire telemetry matching the existing summarizedConversationHistory event - /* __GDPR__ - "summarizedConversationHistory" : { - "owner": "bhavyau", - "comment": "Tracks inline summarization", - "outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state." }, - "model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." }, - "summarizationMode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The summarization mode." }, - "source": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether background or foreground." }, - "conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Session id." }, - "chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID." }, - "lastUsedTool": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The last tool used before summarization." }, - "requestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The request ID from the summarization call." }, - "numRounds": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Total tool call rounds." }, - "numRoundsSinceLastSummarization": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Rounds since last summarization." }, - "turnIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current turn." }, - "curTurnRoundIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current round within the current turn." }, - "isDuringToolCalling": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Whether this was triggered during tool calling." }, - "promptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Prompt tokens." }, - "promptCacheTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Cached prompt tokens." }, - "responseTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Output tokens." } - } - */ - this._telemetryService.sendMSFTTelemetryEvent('summarizedConversationHistory', { - outcome: 'success', - model: resolvedModel, - summarizationMode: 'inline', - source: 'foreground', - conversationId: this.options.conversation.sessionId, - chatRequestId: turn.id, - lastUsedTool, - requestId: result.response.requestId, - }, { - numRounds, - numRoundsSinceLastSummarization, - turnIndex: history.length, - curTurnRoundIndex: numRoundsInCurrentTurn, - isDuringToolCalling: numRoundsInCurrentTurn > 0 ? 1 : 0, - promptTokenCount: usage?.prompt_tokens, - promptCacheTokenCount: usage?.prompt_tokens_details?.cached_tokens, - responseTokenCount: usage?.completion_tokens, - }); - GenAiMetrics.incrementAgentSummarizationCount(this._otelService, 'success'); - - this._logService.info(`[ToolCallingLoop] Inline summarization extracted (${summaryText.length} chars, roundId=${summarizedRound}), continuing loop`); - - // Remove the summarization round — it served its purpose - // and shouldn't be rendered as an assistant message in - // subsequent iterations (otherwise the model sees both - // the compacted AND the raw - // ...... response). - this.toolCallRounds.pop(); - - // Resolve the "Compacting conversation..." progress to show "Compacted conversation" - this.inlineSummarizationProgressDeferred?.complete(undefined); - this.inlineSummarizationProgressDeferred = undefined; - continue; - } else { - this._logService.warn(`[ToolCallingLoop] Inline summarization: no round found to store summary on`); - this._sendInlineSummarizationFailureTelemetry('noRoundFound', result.response); - GenAiMetrics.incrementAgentSummarizationCount(this._otelService, 'failed'); - this.inlineSummarizationProgressDeferred?.complete(undefined); - this.inlineSummarizationProgressDeferred = undefined; - // Fall through to normal no-tool-calls handling (will break the loop) - } - } else { - this._logService.warn(`[ToolCallingLoop] Inline summarization requested but no summary extracted from response`); - this._sendInlineSummarizationFailureTelemetry('extractionFailed', result.response); - GenAiMetrics.incrementAgentSummarizationCount(this._otelService, 'failed'); - this.inlineSummarizationProgressDeferred?.complete(undefined); - this.inlineSummarizationProgressDeferred = undefined; - // Fall through to normal no-tool-calls handling (will break the loop) - } - } - } - // If the model produced productive (non-task_complete) tool calls after being nudged, // reset the stop hook flag and iteration count so it can be nudged again. if (this.autopilotStopHookActive && result.round.toolCalls.length && !result.round.toolCalls.some(tc => tc.name === ToolCallingLoop.TASK_COMPLETE_TOOL_NAME)) { @@ -1133,8 +991,6 @@ export abstract class ToolCallingLoop(); - this.inlineSummarizationProgressDeferred = deferred; - outputStream?.progress(l10n.t('Compacting conversation...'), async () => { - await deferred.p; - return l10n.t('Compacted conversation'); - }); - } const endpoint = await this._endpointProvider.getChatEndpoint(this.options.request); const tokenizer = endpoint.acquireTokenizer(); @@ -1381,14 +1224,11 @@ export abstract class ToolCallingLoop tags) is not shown to the user. - const effectiveOutputStream = inlineSummarizationRequested ? undefined : outputStream; - const streamParticipants = effectiveOutputStream ? [effectiveOutputStream] : []; + const streamParticipants = outputStream ? [outputStream] : []; let fetchStreamSource: FetchStreamSource | undefined; let processResponsePromise: Promise | undefined; let stopEarly = false; - if (effectiveOutputStream) { + if (outputStream) { this.options.streamParticipants?.forEach(fn => { streamParticipants.push(fn(streamParticipants[streamParticipants.length - 1])); }); @@ -1428,7 +1268,6 @@ export abstract class ToolCallingLoop 2) { - // 3+ rounds: mark the one before the last real round, preserving rK verbatim - rounds[rounds.length - 3].summary = summaryText; - return rounds[rounds.length - 3].id; - } else if (rounds.length > 1) { - // 2 rounds (one real + summaryRound): mark the real round - rounds[rounds.length - 2].summary = summaryText; - return rounds[rounds.length - 2].id; - } - return undefined; - } - - /** - * Fires a `summarizedConversationHistory` telemetry event for inline summarization failures, - * matching the format of the existing `ConversationHistorySummarizer.sendSummarizationTelemetry()`. - */ - private _sendInlineSummarizationFailureTelemetry(detailedOutcome: string, response: ChatResponse): void { - const history = this.options.conversation.turns.slice(0, -1); - const numRoundsInHistory = history.reduce((sum, t) => sum + t.rounds.length, 0); - const numRoundsInCurrentTurn = this.toolCallRounds.length; - const resolvedModel = response.type === ChatFetchResponseType.Success ? response.resolvedModel : undefined; - const requestId = response.type === ChatFetchResponseType.Success ? response.requestId : ''; - const usage = response.type === ChatFetchResponseType.Success ? response.usage : undefined; - - /* __GDPR__ - "summarizedConversationHistory" : { - "owner": "bhavyau", - "comment": "Tracks inline summarization failure", - "outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state." }, - "detailedOutcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Detailed failure reason." }, - "model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." }, - "summarizationMode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The summarization mode." }, - "source": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether background or foreground." }, - "conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Session id." }, - "chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID." }, - "requestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The request ID from the summarization call." }, - "numRounds": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Total tool call rounds." }, - "turnIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current turn." }, - "curTurnRoundIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current round within the current turn." }, - "promptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Prompt tokens." }, - "promptCacheTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Cached prompt tokens." }, - "responseTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Output tokens." } - } - */ - this._telemetryService.sendMSFTTelemetryEvent('summarizedConversationHistory', { - outcome: 'failed', - detailedOutcome, - model: resolvedModel, - summarizationMode: 'inline', - source: 'foreground', - conversationId: this.options.conversation.sessionId, - chatRequestId: this.turn.id, - requestId, - }, { - numRounds: numRoundsInHistory + numRoundsInCurrentTurn, - turnIndex: history.length, - curTurnRoundIndex: numRoundsInCurrentTurn, - promptTokenCount: usage?.prompt_tokens, - promptCacheTokenCount: usage?.prompt_tokens_details?.cached_tokens, - responseTokenCount: usage?.completion_tokens, - }); - } - private applyMessagePostProcessing(messages: Raw.ChatMessage[], options?: { stripOrphanedToolCalls?: boolean }): Raw.ChatMessage[] { return this.validateToolMessages( ToolCallingLoop.stripInternalToolCallIds(messages), options); @@ -1879,8 +1638,6 @@ export interface IToolCallSingleResult { hadIgnoredFiles: boolean; lastRequestMessages: Raw.ChatMessage[]; availableTools: readonly LanguageModelToolInformation[]; - /** Set when the prompt included inline summarization instructions. */ - inlineSummarizationRequested?: boolean; } export interface IToolCallLoopResult extends IToolCallSingleResult { diff --git a/extensions/copilot/src/extension/prompt/node/defaultIntentRequestHandler.ts b/extensions/copilot/src/extension/prompt/node/defaultIntentRequestHandler.ts index dcb3732d135..cdaa7ac654e 100644 --- a/extensions/copilot/src/extension/prompt/node/defaultIntentRequestHandler.ts +++ b/extensions/copilot/src/extension/prompt/node/defaultIntentRequestHandler.ts @@ -687,10 +687,9 @@ class DefaultToolCallingLoop extends ToolCallingLoop { protected override async fetch(opts: ToolCallingLoopFetchOptions, token: CancellationToken): Promise { const messageSourcePrefix = this.options.location === ChatLocation.Editor ? 'inline' : 'chat'; - const baseDebugName = this.options.request.subAgentInvocationId ? + const debugName = this.options.request.subAgentInvocationId ? `tool/runSubagent${this.options.request.subAgentName ? `-${this.options.request.subAgentName}` : ''}` : `${ChatLocation.toStringShorter(this.options.location)}/${this.options.intent?.id}`; - const debugName = this._isInlineSummarizationRequest ? 'inlineSummarizeConversationHistory-full' : baseDebugName; const location = this.options.overrideRequestLocation ?? this.options.location; const isThinkingLocation = location === ChatLocation.Agent || location === ChatLocation.MessagesProxy; const rawEffort = this.options.request.modelConfiguration?.reasoningEffort; diff --git a/extensions/copilot/src/extension/prompts/node/agent/agentPrompt.tsx b/extensions/copilot/src/extension/prompts/node/agent/agentPrompt.tsx index 30da7e78abc..e80f70291fb 100644 --- a/extensions/copilot/src/extension/prompts/node/agent/agentPrompt.tsx +++ b/extensions/copilot/src/extension/prompts/node/agent/agentPrompt.tsx @@ -56,14 +56,6 @@ export interface AgentPromptProps extends GenericBasePromptElementProps { readonly triggerSummarize?: boolean; - /** - * When true, appends a summarization instruction as a user message in the - * current agent loop iteration instead of making a separate LLM call. - * The model outputs ONLY a summary (no tool calls) and the loop continues - * with the compacted history on the next iteration. - */ - readonly inlineSummarization?: boolean; - /** * Enables cache breakpoints and summarization */ @@ -151,7 +143,6 @@ export class AgentPrompt extends PromptElement { | undefined; readonly enableCacheBreakpoints?: boolean; readonly workingNotebook?: NotebookDocument; @@ -420,8 +417,6 @@ export class SummarizedConversationHistory extends PromptElement | undefined, token: CancellationToken | undefined) { const promptContext = { ...this.props.promptContext }; let historyMetadata: SummarizedConversationHistoryMetadata | undefined; - const transcriptLookupEnabled = this.configurationService.getExperimentBasedConfig(ConfigKey.ConversationTranscriptLookup, this.experimentationService); - // Resolve transcript path and flush to disk so the model can read the up-to-date file let transcriptPath: string | undefined; const sessionId = this.props.promptContext.conversation?.sessionId; - if (transcriptLookupEnabled && sessionId) { + if (sessionId) { // Lazily start the transcript session now (before summarization) so it // captures the full pre-compaction conversation. startSession is // idempotent — if hooks already started it, this is a no-op. @@ -479,18 +472,12 @@ export class SummarizedConversationHistory extends PromptElement {historyMetadata && } - {inlineSummarizationRequested && } - {inlineSummarizationRequested && } ; } @@ -687,7 +674,7 @@ class ConversationHistorySummarizer { const budgetExceeded = e instanceof BudgetExceededError; const outcome = budgetExceeded ? 'budget_exceeded' : 'renderError'; this.logInfo(`Error rendering summarization prompt in mode: ${mode}. ${e.stack}`, mode); - this.sendSummarizationTelemetry(outcome, '', this.props.endpoint.model, mode, stopwatch.elapsed(), undefined); + this.sendSummarizationTelemetry(outcome, '', this.props.endpoint.model, mode, stopwatch.elapsed(), undefined, e instanceof Error ? e.message : String(e)); throw e; } @@ -704,7 +691,7 @@ class ConversationHistorySummarizer { }, type: 'function' })), (tool, rule) => { - this.logService.warn(`Tool ${tool} failed validation: ${rule}`); + this.logService.warn(`[ConversationHistorySummarizer] Tool ${tool} failed validation: ${rule}`); }, ) : undefined; const toolOpts = normalizedTools?.length ? { @@ -766,7 +753,7 @@ class ConversationHistorySummarizer { }, this.token ?? CancellationToken.None); } catch (e) { this.logInfo(`Error from summarization request. ${e.message}`, mode); - this.sendSummarizationTelemetry('requestThrow', '', this.props.endpoint.model, mode, stopwatch.elapsed(), undefined); + this.sendSummarizationTelemetry('requestThrow', '', this.props.endpoint.model, mode, stopwatch.elapsed(), undefined, e instanceof Error ? e.message : String(e)); throw e; } @@ -806,7 +793,7 @@ class ConversationHistorySummarizer { ? Math.min(this.sizing.tokenBudget, this.props.maxSummaryTokens) : this.sizing.tokenBudget; if (summarySize > effectiveBudget) { - this.sendSummarizationTelemetry('too_large', response.requestId, this.props.endpoint.model, mode, elapsedTime, response.usage); + this.sendSummarizationTelemetry('too_large', response.requestId, this.props.endpoint.model, mode, elapsedTime, response.usage, `${summarySize} tokens exceeds budget ${effectiveBudget}`); this.logInfo(`Summary too large: ${summarySize} tokens (effective budget ${effectiveBudget})`, mode); throw new Error('Summary too large'); } @@ -1072,14 +1059,7 @@ class SummaryMessageElement extends PromptElement { } } -/** - * Metadata flag indicating that inline summarization was requested in this render. - * The caller (agentIntent) checks for this to know the model response should - * contain only a summary. - */ -export class InlineSummarizationRequestedMetadata extends PromptMetadata { } - -interface InlineSummarizationUserMessageProps extends BasePromptElementProps { +export interface InlineSummarizationUserMessageProps extends BasePromptElementProps { readonly endpoint: IChatEndpoint; } @@ -1089,7 +1069,7 @@ interface InlineSummarizationUserMessageProps extends BasePromptElementProps { * no tool calls. The summary is extracted from the response and stored on the round * for the next iteration. */ -class InlineSummarizationUserMessage extends PromptElement { +export class InlineSummarizationUserMessage extends PromptElement { override async render(state: void, sizing: PromptSizing) { const isOpus = this.props.endpoint.model.startsWith('claude-opus'); return diff --git a/extensions/copilot/src/extension/prompts/node/agent/test/summarization.spec.tsx b/extensions/copilot/src/extension/prompts/node/agent/test/summarization.spec.tsx index 6e362489200..e436572444e 100644 --- a/extensions/copilot/src/extension/prompts/node/agent/test/summarization.spec.tsx +++ b/extensions/copilot/src/extension/prompts/node/agent/test/summarization.spec.tsx @@ -30,7 +30,7 @@ import { ToolName } from '../../../../tools/common/toolNames'; import { PromptRenderer } from '../../base/promptRenderer'; import { AgentPrompt, AgentPromptProps } from '../agentPrompt'; import { PromptRegistry } from '../promptRegistry'; -import { ConversationHistorySummarizationPrompt, extractInlineSummary, InlineSummarizationRequestedMetadata, stripToolSearchMessages, SummarizedConversationHistory, SummarizedConversationHistoryMetadata, SummarizedConversationHistoryPropsBuilder } from '../summarizedConversationHistory'; +import { ConversationHistorySummarizationPrompt, extractInlineSummary, stripToolSearchMessages, SummarizedConversationHistory, SummarizedConversationHistoryMetadata, SummarizedConversationHistoryPropsBuilder } from '../summarizedConversationHistory'; suite('Agent Summarization', () => { let accessor: ITestingServicesAccessor; @@ -582,162 +582,6 @@ suite('extractInlineSummary', () => { }); }); -suite('Inline Summarization Prompt', () => { - let accessor: ITestingServicesAccessor; - - beforeAll(() => { - const services = createExtensionUnitTestingServices(); - services.define(IWorkspaceService, new SyncDescriptor( - TestWorkspaceService, - [ - [URI.file('/workspace')], - [] - ] - )); - services.define(IChatMLFetcher, new StaticChatMLFetcher([])); - accessor = services.createTestingAccessor(); - }); - - afterAll(() => { - accessor.dispose(); - }); - - test('inlineSummarization=true appends summarization user message and metadata', async () => { - const instaService = accessor.get(IInstantiationService); - const endpoint = instaService.createInstance(MockEndpoint, undefined); - const turn = new Turn('turnId', { type: 'user', message: 'hello' }); - const conversation = new Conversation('sessionId', [turn]); - - const firstTurn = new Turn('id1', { type: 'user', message: 'previous turn message' }); - firstTurn.setResponse(TurnStatus.Success, { type: 'user', message: 'response' }, 'responseId', { - metadata: { - toolCallRounds: [ - new ToolCallRound('ok', [{ - id: 'tooluse_1', - name: ToolName.EditFile, - arguments: JSON.stringify({ filePath: '/workspace/file.ts', code: 'test' }) - }]), - ] - } - } as ICopilotChatResultIn); - - const promptContext: IBuildPromptContext = { - chatVariables: new ChatVariablesCollection([]), - history: [firstTurn], - query: 'continue', - toolCallRounds: [ - new ToolCallRound('ok 2', [{ - id: 'tooluse_2', - name: ToolName.EditFile, - arguments: JSON.stringify({ filePath: '/workspace/file.ts', code: 'test2' }) - }]), - ], - toolCallResults: { - 'tooluse_2': new LanguageModelToolResult([new LanguageModelTextPart('success')]), - }, - tools: { - availableTools: [], - toolInvocationToken: null as never, - toolReferences: [], - }, - conversation, - }; - - const customizations = await PromptRegistry.resolveAllCustomizations(instaService, endpoint); - const props: AgentPromptProps = { - priority: 1, - endpoint, - location: ChatLocation.Panel, - promptContext, - enableCacheBreakpoints: true, - inlineSummarization: true, - customizations, - }; - - const renderer = PromptRenderer.create(instaService, endpoint, AgentPrompt, props); - const result = await renderer.render(); - - // Should have InlineSummarizationRequestedMetadata set - const inlineMeta = result.metadata.get(InlineSummarizationRequestedMetadata); - expect(inlineMeta).toBeDefined(); - - // The last user message should contain summarization instructions - const userMessages = result.messages.filter(m => m.role === Raw.ChatRole.User); - const lastUserMessage = userMessages[userMessages.length - 1]; - const lastMessageText = lastUserMessage.content.map(c => 'text' in c ? c.text : '').join(''); - expect(lastMessageText).toContain('summary'); - expect(lastMessageText).toContain('Do NOT call any tools'); - - // Should NOT have the separate-call summarization metadata - const summaryMeta = result.metadata.get(SummarizedConversationHistoryMetadata); - expect(summaryMeta).toBeUndefined(); - }); - - test('inlineSummarization=true sets metadata when triggerSummarize is false', async () => { - const instaService = accessor.get(IInstantiationService); - const endpoint = instaService.createInstance(MockEndpoint, undefined); - - const firstTurn = new Turn('id1', { type: 'user', message: 'previous turn message' }); - firstTurn.setResponse(TurnStatus.Success, { type: 'user', message: 'response' }, 'responseId', { - metadata: { - toolCallRounds: [ - new ToolCallRound('ok', [{ - id: 'tooluse_1', - name: ToolName.EditFile, - arguments: JSON.stringify({ filePath: '/workspace/file.ts', code: 'test' }) - }]), - ] - } - } as ICopilotChatResultIn); - - const promptContext: IBuildPromptContext = { - chatVariables: new ChatVariablesCollection([]), - history: [firstTurn], - query: 'continue', - toolCallRounds: [ - new ToolCallRound('ok 2', [{ - id: 'tooluse_2', - name: ToolName.EditFile, - arguments: JSON.stringify({ filePath: '/workspace/file.ts', code: 'test2' }) - }]), - ], - toolCallResults: { - 'tooluse_2': new LanguageModelToolResult([new LanguageModelTextPart('success')]), - }, - tools: { - availableTools: [], - toolInvocationToken: null as never, - toolReferences: [], - }, - }; - - // When both triggerSummarize and inlineSummarization are true, - // triggerSummarize should take precedence (inlineSummarization condition - // requires triggerSummarize to be false). - // We test this indirectly: inlineSummarization=true with triggerSummarize=false - // should set InlineSummarizationRequestedMetadata, but if triggerSummarize were - // also true, the inline path would be skipped. - const customizations = await PromptRegistry.resolveAllCustomizations(instaService, endpoint); - const propsInlineOnly: AgentPromptProps = { - priority: 1, - endpoint, - location: ChatLocation.Panel, - promptContext, - enableCacheBreakpoints: true, - triggerSummarize: false, - inlineSummarization: true, - customizations, - }; - - const renderer = PromptRenderer.create(instaService, endpoint, AgentPrompt, propsInlineOnly); - const result = await renderer.render(); - - // Inline metadata should be set when triggerSummarize is false - const inlineMeta = result.metadata.get(InlineSummarizationRequestedMetadata); - expect(inlineMeta).toBeDefined(); - }); -}); - suite('stripToolSearchMessages', () => { function makeAssistantMessage(toolCalls: { id: string; name: string }[], text = 'response'): Raw.ChatMessage { return { diff --git a/extensions/copilot/src/platform/configuration/common/configurationService.ts b/extensions/copilot/src/platform/configuration/common/configurationService.ts index a6ee3cbcd1e..bda44f90c2a 100644 --- a/extensions/copilot/src/platform/configuration/common/configurationService.ts +++ b/extensions/copilot/src/platform/configuration/common/configurationService.ts @@ -965,8 +965,6 @@ export namespace ConfigKey { export const NewWorkspaceCreationAgentEnabled = defineSetting('chat.newWorkspaceCreation.enabled', ConfigType.Simple, true); export const NewWorkspaceUseContext7 = defineSetting('chat.newWorkspace.useContext7', ConfigType.Simple, false); export const SummarizeAgentConversationHistory = defineSetting('chat.summarizeAgentConversationHistory.enabled', ConfigType.Simple, true); - export const ConversationTranscriptLookup = defineSetting('chat.conversationTranscriptLookup.enabled', ConfigType.ExperimentBased, false); - export const BackgroundCompaction = defineSetting('chat.backgroundCompaction', ConfigType.ExperimentBased, false); export const VirtualToolThreshold = defineSetting('chat.virtualTools.threshold', ConfigType.ExperimentBased, HARD_TOOL_LIMIT); export const CurrentEditorAgentContext = defineSetting('chat.agent.currentEditorContext.enabled', ConfigType.Simple, true); /** BYOK */