mirror of
https://github.com/microsoft/vscode.git
synced 2026-05-17 22:00:59 +01:00
Background inline summarization v1 (#308923)
* Refactor inline summarization handling in ToolCallingLoop * Refactor conversation summarization settings and improve logging in AgentIntent * Refactor agent intent to improve telemetry and remove obsolete test file * Refactor inline summarization handling: remove unused properties and related tests * Remove unused summarization instruction from AgentPromptProps interface * Refactor AgentIntentInvocation to streamline model capabilities handling in background summarization * Update debugName for background summarization to reflect inline context * Update logging message in AgentIntentInvocation for clarity and remove unused test suite for inline summarization
This commit is contained in:
@@ -3224,15 +3224,6 @@
|
||||
"onExp"
|
||||
]
|
||||
},
|
||||
"github.copilot.chat.backgroundCompaction": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"markdownDescription": "%github.copilot.config.backgroundCompaction%",
|
||||
"tags": [
|
||||
"preview",
|
||||
"onExp"
|
||||
]
|
||||
},
|
||||
"github.copilot.chat.anthropic.toolSearchTool.enabled": {
|
||||
"type": "boolean",
|
||||
"default": true,
|
||||
@@ -3253,15 +3244,6 @@
|
||||
"preview",
|
||||
"onExp"
|
||||
]
|
||||
},
|
||||
"github.copilot.chat.conversationTranscriptLookup.enabled": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "%github.copilot.config.conversationTranscriptLookup.enabled%",
|
||||
"tags": [
|
||||
"preview",
|
||||
"onExp"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
@@ -263,7 +263,6 @@
|
||||
"github.copilot.config.editsNewNotebook.enabled": "Whether to enable the new notebook tool in Copilot Edits.",
|
||||
"github.copilot.config.notebook.inlineEditAgent.enabled": "Enable agent-like behavior from the notebook inline chat widget.",
|
||||
"github.copilot.config.summarizeAgentConversationHistory.enabled": "Whether to auto-compact agent conversation history once the context window is filled.",
|
||||
"github.copilot.config.conversationTranscriptLookup.enabled": "When enabled, after conversation history is summarized the model is informed it can look up the full conversation transcript via read_file.",
|
||||
"github.copilot.tools.createNewWorkspace.name": "Create New Workspace",
|
||||
"github.copilot.tools.openEmptyFolder.name": "Open an empty folder as VS Code workspace",
|
||||
"github.copilot.tools.getProjectSetupInfo.name": "Get Project Setup Info",
|
||||
@@ -391,9 +390,7 @@
|
||||
"github.copilot.config.instantApply.shortContextLimit": "Token limit for short context instant apply.",
|
||||
"github.copilot.config.summarizeAgentConversationHistoryThreshold": "Threshold for compacting agent conversation history.",
|
||||
"github.copilot.config.agentHistorySummarizationMode": "Mode for agent history summarization.",
|
||||
"github.copilot.config.backgroundCompaction": "Enable background compaction of conversation history.",
|
||||
"github.copilot.config.agentHistorySummarizationInline": "Summarize conversation inline within the agent loop instead of a separate LLM call, maximizing prompt cache hits.",
|
||||
|
||||
"github.copilot.config.useResponsesApiTruncation": "Use Responses API for truncation.",
|
||||
"github.copilot.config.enableReadFileV2": "Enable version 2 of the read file tool.",
|
||||
"github.copilot.config.enableAskAgent": "Enable the Ask agent for answering questions.",
|
||||
|
||||
@@ -8,7 +8,7 @@ import { Raw, RenderPromptResult } from '@vscode/prompt-tsx';
|
||||
import { BudgetExceededError } from '@vscode/prompt-tsx/dist/base/materialized';
|
||||
import type * as vscode from 'vscode';
|
||||
import { IChatSessionService } from '../../../platform/chat/common/chatSessionService';
|
||||
import { ChatLocation, ChatResponse } from '../../../platform/chat/common/commonTypes';
|
||||
import { ChatFetchResponseType, ChatLocation, ChatResponse } from '../../../platform/chat/common/commonTypes';
|
||||
import { ConfigKey, IConfigurationService } from '../../../platform/configuration/common/configurationService';
|
||||
import { isAnthropicFamily, isGptFamily, modelCanUseApplyPatchExclusively, modelCanUseReplaceStringExclusively, modelSupportsApplyPatch, modelSupportsMultiReplaceString, modelSupportsReplaceString, modelSupportsSimplifiedApplyPatchInstructions } from '../../../platform/endpoint/common/chatModelCapabilities';
|
||||
import { IEndpointProvider } from '../../../platform/endpoint/common/endpointProvider';
|
||||
@@ -16,7 +16,7 @@ import { IAutomodeService } from '../../../platform/endpoint/node/automodeServic
|
||||
import { IEnvService } from '../../../platform/env/common/envService';
|
||||
import { ILogService } from '../../../platform/log/common/logService';
|
||||
import { IEditLogService } from '../../../platform/multiFileEdit/common/editLogService';
|
||||
import { CUSTOM_TOOL_SEARCH_NAME, isAnthropicCustomToolSearchEnabled, isAnthropicToolSearchEnabled } from '../../../platform/networking/common/anthropic';
|
||||
import { CUSTOM_TOOL_SEARCH_NAME, isAnthropicCustomToolSearchEnabled, isAnthropicContextEditingEnabled, isAnthropicToolSearchEnabled } from '../../../platform/networking/common/anthropic';
|
||||
import { IChatEndpoint } from '../../../platform/networking/common/networking';
|
||||
import { modelsWithoutResponsesContextManagement } from '../../../platform/networking/common/openai';
|
||||
import { INotebookService } from '../../../platform/notebook/common/notebookService';
|
||||
@@ -47,13 +47,14 @@ import { IBuildPromptResult, IIntent, IIntentInvocation } from '../../prompt/nod
|
||||
import { AgentPrompt, AgentPromptProps } from '../../prompts/node/agent/agentPrompt';
|
||||
import { BackgroundSummarizationState, BackgroundSummarizer, IBackgroundSummarizationResult } from '../../prompts/node/agent/backgroundSummarizer';
|
||||
import { AgentPromptCustomizations, PromptRegistry } from '../../prompts/node/agent/promptRegistry';
|
||||
import { SummarizedConversationHistory, SummarizedConversationHistoryMetadata, SummarizedConversationHistoryPropsBuilder } from '../../prompts/node/agent/summarizedConversationHistory';
|
||||
import { PromptRenderer } from '../../prompts/node/base/promptRenderer';
|
||||
import { extractInlineSummary, InlineSummarizationUserMessage, SummarizedConversationHistory, SummarizedConversationHistoryMetadata, SummarizedConversationHistoryPropsBuilder } from '../../prompts/node/agent/summarizedConversationHistory';
|
||||
import { PromptRenderer, renderPromptElement } from '../../prompts/node/base/promptRenderer';
|
||||
import { ICodeMapperService } from '../../prompts/node/codeMapper/codeMapperService';
|
||||
import { EditCodePrompt2 } from '../../prompts/node/panel/editCodePrompt2';
|
||||
import { NotebookInlinePrompt } from '../../prompts/node/panel/notebookInlinePrompt';
|
||||
import { ToolResultMetadata } from '../../prompts/node/panel/toolCalling';
|
||||
import { IEditToolLearningService } from '../../tools/common/editToolLearningService';
|
||||
import { normalizeToolSchema } from '../../tools/common/toolSchemaNormalizer';
|
||||
import { ContributedToolName, ToolName } from '../../tools/common/toolNames';
|
||||
import { IToolsService } from '../../tools/common/toolsService';
|
||||
import { applyPatch5Description } from '../../tools/node/applyPatchTool';
|
||||
@@ -62,8 +63,7 @@ import { replaceStringBatchDescription } from '../../tools/node/replaceStringToo
|
||||
import { getAgentMaxRequests } from '../common/agentConfig';
|
||||
import { addCacheBreakpoints } from './cacheBreakpoints';
|
||||
import { EditCodeIntent, EditCodeIntentInvocation, EditCodeIntentInvocationOptions, mergeMetadata, toNewChatReferences } from './editCodeIntent';
|
||||
|
||||
const INLINE_SUMMARIZATION_BUDGET_EXPANSION = 1.15;
|
||||
import { ToolCallingLoop } from './toolCallingLoop';
|
||||
|
||||
function isResponsesCompactionContextManagementEnabled(endpoint: IChatEndpoint, configurationService: IConfigurationService, experimentationService: IExperimentationService): boolean {
|
||||
return endpoint.apiType === 'responses'
|
||||
@@ -356,6 +356,9 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
|
||||
|
||||
private _lastRenderTokenCount: number = 0;
|
||||
|
||||
/** Cached model capabilities from the most recent main agent render, reused by the background summarizer. */
|
||||
private _lastModelCapabilities: { enableThinking: boolean; reasoningEffort: string | undefined; enableToolSearch: boolean; enableContextEditing: boolean } | undefined;
|
||||
|
||||
constructor(
|
||||
intent: IIntent,
|
||||
location: ChatLocation,
|
||||
@@ -418,9 +421,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
|
||||
const useTruncation = this.endpoint.apiType === 'responses' && this.configurationService.getConfig(ConfigKey.Advanced.UseResponsesApiTruncation);
|
||||
const responsesCompactionContextManagementEnabled = isResponsesCompactionContextManagementEnabled(this.endpoint, this.configurationService, this.expService);
|
||||
const summarizationEnabled = this.configurationService.getConfig(ConfigKey.SummarizeAgentConversationHistory) && this.prompt === AgentPrompt && !responsesCompactionContextManagementEnabled;
|
||||
const inlineSummarizationEnabled = summarizationEnabled && this.configurationService.getExperimentBasedConfig(ConfigKey.Advanced.AgentHistorySummarizationInline, this.expService);
|
||||
// Disable background compaction when inline summarization is active — they solve the same problem
|
||||
const backgroundCompactionEnabled = summarizationEnabled && !inlineSummarizationEnabled && this.configurationService.getExperimentBasedConfig(ConfigKey.BackgroundCompaction, this.expService);
|
||||
const useInlineSummarization = summarizationEnabled && this.configurationService.getExperimentBasedConfig(ConfigKey.Advanced.AgentHistorySummarizationInline, this.expService);
|
||||
|
||||
// When tools are present, apply a 10% safety margin on the message portion
|
||||
// to account for tokenizer discrepancies between our tool-token counter and
|
||||
@@ -432,7 +433,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
|
||||
const safeBudget = useTruncation ? Number.MAX_SAFE_INTEGER : messageBudget;
|
||||
const endpoint = toolTokens > 0 ? this.endpoint.cloneWithTokenOverride(safeBudget) : this.endpoint;
|
||||
|
||||
this.logService.debug(`AgentIntent: rendering with budget=${safeBudget} (baseBudget: ${baseBudget}, toolTokens: ${toolTokens}, totalTools: ${tools?.length ?? 0}, toolSearchEnabled: ${toolSearchEnabled}), summarizationEnabled=${summarizationEnabled}`);
|
||||
this.logService.debug(`[Agent] rendering with budget=${safeBudget} (baseBudget: ${baseBudget}, toolTokens: ${toolTokens}, totalTools: ${tools?.length ?? 0}, toolSearchEnabled: ${toolSearchEnabled}), summarizationEnabled=${summarizationEnabled}`);
|
||||
let result: RenderPromptResult;
|
||||
const props: AgentPromptProps = {
|
||||
endpoint,
|
||||
@@ -449,94 +450,42 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
|
||||
customizations: this._resolvedCustomizations
|
||||
};
|
||||
|
||||
// ── Background compaction: dual-threshold approach ────────────────
|
||||
// ── Background compaction ────────────────────────────────────────
|
||||
//
|
||||
// Background compaction thresholds (checked post-render using the
|
||||
// actual tokenCount from the current render):
|
||||
// Pre-render: if a previous bg pass completed, apply it now.
|
||||
//
|
||||
// Completed (previous bg pass) → apply the summary before rendering.
|
||||
// BudgetExceeded: if bg is InProgress/Completed, wait/apply.
|
||||
// Otherwise fall back to foreground summarization.
|
||||
//
|
||||
// ≥ 95% + InProgress → block on the background compaction
|
||||
// completing, then apply before rendering.
|
||||
// Post-render (≥ 80% + Idle): kick off background compaction
|
||||
// so it is ready for a future turn.
|
||||
//
|
||||
// ≥ 80% + Idle (post-render) → kick off background compaction so
|
||||
// it is ready for a future iteration.
|
||||
//
|
||||
const backgroundSummarizer = backgroundCompactionEnabled ? this._getOrCreateBackgroundSummarizer(promptContext.conversation?.sessionId) : undefined;
|
||||
const backgroundSummarizer = summarizationEnabled ? this._getOrCreateBackgroundSummarizer(promptContext.conversation?.sessionId) : undefined;
|
||||
const contextRatio = backgroundSummarizer && baseBudget > 0
|
||||
? (this._lastRenderTokenCount + toolTokens) / baseBudget
|
||||
: 0;
|
||||
|
||||
// ── Proactive inline summarization: pre-render check ──────────────
|
||||
// Use _lastRenderTokenCount (from the previous iteration) to decide
|
||||
// whether to append the summarize instruction *before* the main
|
||||
// render, avoiding a wasteful double-render.
|
||||
// Guard: skip when a summary was already stored on the current or
|
||||
// most-recent history turn — _lastRenderTokenCount is stale from the
|
||||
// summarization render and would falsely re-trigger.
|
||||
let proactiveInlineSummarization = false;
|
||||
if (inlineSummarizationEnabled && baseBudget > 0) {
|
||||
const hasRecentSummary = promptContext.toolCallRounds?.some(r => r.summary)
|
||||
|| promptContext.history.at(-1)?.rounds.some(r => r.summary);
|
||||
if (!hasRecentSummary) {
|
||||
const preRenderRatio = (this._lastRenderTokenCount + toolTokens) / baseBudget;
|
||||
if (preRenderRatio >= 0.85) {
|
||||
this.logService.debug(`[Agent] pre-render at ${(preRenderRatio * 100).toFixed(0)}% — proactively enabling inline summarization`);
|
||||
proactiveInlineSummarization = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Track whether we applied a summary in this iteration so we don't
|
||||
// immediately re-trigger background compaction in the post-render check.
|
||||
let summaryAppliedThisIteration = false;
|
||||
|
||||
// 1. If a previous background pass completed, apply its summary now.
|
||||
if (backgroundCompactionEnabled && backgroundSummarizer?.state === BackgroundSummarizationState.Completed) {
|
||||
// If a previous background pass completed, apply its summary now.
|
||||
if (summarizationEnabled && backgroundSummarizer?.state === BackgroundSummarizationState.Completed) {
|
||||
const bgResult = backgroundSummarizer.consumeAndReset();
|
||||
if (bgResult) {
|
||||
this.logService.debug(`[Agent] applying completed background summary (roundId=${bgResult.toolCallRoundId})`);
|
||||
this.logService.debug(`[ConversationHistorySummarizer] applying completed background summary (roundId=${bgResult.toolCallRoundId})`);
|
||||
progress.report(new ChatResponseProgressPart2(l10n.t('Compacted conversation'), async () => l10n.t('Compacted conversation')));
|
||||
this._applySummaryToRounds(bgResult, promptContext);
|
||||
this._persistSummaryOnTurn(bgResult, promptContext, this._lastRenderTokenCount);
|
||||
this._sendBackgroundCompactionTelemetry('preRender', 'applied', contextRatio, promptContext);
|
||||
summaryAppliedThisIteration = true;
|
||||
} else {
|
||||
this.logService.warn(`[Agent] background compaction state was Completed but consumeAndReset returned no result`);
|
||||
this.logService.warn(`[ConversationHistorySummarizer] background compaction state was Completed but consumeAndReset returned no result`);
|
||||
this._sendBackgroundCompactionTelemetry('preRender', 'noResult', contextRatio, promptContext);
|
||||
this._recordBackgroundCompactionFailure(promptContext, 'preRender');
|
||||
}
|
||||
}
|
||||
|
||||
// 2. At ≥ 95% — block and wait for the in-progress compaction,
|
||||
// then apply the result before rendering.
|
||||
if (backgroundCompactionEnabled && backgroundSummarizer && contextRatio >= 0.95 && backgroundSummarizer.state === BackgroundSummarizationState.InProgress) {
|
||||
this.logService.debug(`[Agent] context at ${(contextRatio * 100).toFixed(0)}% — blocking on background compaction`);
|
||||
const summaryPromise = backgroundSummarizer.waitForCompletion();
|
||||
progress.report(new ChatResponseProgressPart2(l10n.t('Compacting conversation...'), async () => {
|
||||
try { await summaryPromise; } catch { }
|
||||
return l10n.t('Compacted conversation');
|
||||
}));
|
||||
await summaryPromise;
|
||||
const bgResult = backgroundSummarizer.consumeAndReset();
|
||||
if (bgResult) {
|
||||
this.logService.debug(`[Agent] background compaction completed — applying result (roundId=${bgResult.toolCallRoundId})`);
|
||||
this._applySummaryToRounds(bgResult, promptContext);
|
||||
this._persistSummaryOnTurn(bgResult, promptContext, this._lastRenderTokenCount);
|
||||
this._sendBackgroundCompactionTelemetry('preRenderBlocked', 'applied', contextRatio, promptContext);
|
||||
summaryAppliedThisIteration = true;
|
||||
} else {
|
||||
this.logService.debug(`[Agent] background compaction finished but produced no usable result — will attempt foreground summarization if budget exceeded`);
|
||||
this._sendBackgroundCompactionTelemetry('preRenderBlocked', 'noResult', contextRatio, promptContext);
|
||||
this._recordBackgroundCompactionFailure(promptContext, 'preRenderBlocked');
|
||||
// Don't attempt a foreground fallback here — the main render below
|
||||
// will either succeed (context estimate was pessimistic) or throw
|
||||
// BudgetExceededError, which the catch block handles with foreground
|
||||
// summarization. Short-circuiting here would skip the main render
|
||||
// unnecessarily when it might still fit.
|
||||
}
|
||||
}
|
||||
|
||||
// Render the prompt without summarization or cache breakpoints, using
|
||||
// the original endpoint (not reduced for tools/safety buffer).
|
||||
const renderWithoutSummarization = async (reason: string, renderProps: AgentPromptProps = props): Promise<RenderPromptResult> => {
|
||||
@@ -567,7 +516,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
|
||||
const turn = promptContext.conversation?.getLatestTurn();
|
||||
const previousForegroundSummary = turn?.getMetadata(SummarizedConversationHistoryMetadata);
|
||||
if (previousForegroundSummary?.source === 'foreground' && previousForegroundSummary.outcome && previousForegroundSummary.outcome !== 'success') {
|
||||
this.logService.debug(`[Agent] ${reason}, skipping repeated foreground summarization after prior failure (${previousForegroundSummary.outcome})`);
|
||||
this.logService.debug(`[ConversationHistorySummarizer] ${reason}, skipping repeated foreground summarization after prior failure (${previousForegroundSummary.outcome})`);
|
||||
/* __GDPR__
|
||||
"triggerSummarizeSkipped" : {
|
||||
"owner": "bhavyau",
|
||||
@@ -581,7 +530,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
|
||||
return renderWithoutSummarization(`skipping repeated foreground summarization after prior failure (${previousForegroundSummary.outcome})`, renderProps);
|
||||
}
|
||||
|
||||
this.logService.debug(`[Agent] ${reason}, triggering summarization`);
|
||||
this.logService.debug(`[ConversationHistorySummarizer] ${reason}, triggering summarization`);
|
||||
try {
|
||||
const renderer = PromptRenderer.create(this.instantiationService, this.endpoint, this.prompt, {
|
||||
...renderProps,
|
||||
@@ -591,7 +540,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
|
||||
});
|
||||
return await renderer.render(progress, token);
|
||||
} catch (e) {
|
||||
this.logService.error(e, `[Agent] summarization failed`);
|
||||
this.logService.error(e, `[ConversationHistorySummarizer] summarization failed`);
|
||||
const errorKind = e instanceof BudgetExceededError ? 'budgetExceeded' : 'error';
|
||||
/* __GDPR__
|
||||
"triggerSummarizeFailed" : {
|
||||
@@ -621,36 +570,10 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
|
||||
}
|
||||
};
|
||||
|
||||
// Helper function for inline summarization — appends summarize instruction
|
||||
// as a user message in the agent loop instead of making a separate LLM call.
|
||||
// Returns the render result with InlineSummarizationRequestedMetadata set.
|
||||
const renderWithInlineSummarization = async (reason: string, renderProps: AgentPromptProps = props): Promise<RenderPromptResult> => {
|
||||
this.logService.debug(`[Agent] ${reason}, triggering inline summarization`);
|
||||
try {
|
||||
// Expand from the *base* endpoint (not renderProps.endpoint which may already be expanded)
|
||||
const expandedEndpoint = endpoint.cloneWithTokenOverride(endpoint.modelMaxPromptTokens * INLINE_SUMMARIZATION_BUDGET_EXPANSION);
|
||||
const renderer = PromptRenderer.create(this.instantiationService, expandedEndpoint, this.prompt, {
|
||||
...renderProps,
|
||||
endpoint: expandedEndpoint,
|
||||
inlineSummarization: true,
|
||||
});
|
||||
return await renderer.render(progress, token);
|
||||
} catch (e) {
|
||||
this.logService.error(e, `[Agent] inline summarization render failed, falling back to separate-call summarization`);
|
||||
return await renderWithSummarization(`inline summarization failed (${e instanceof Error ? e.message : e}), falling back`, renderProps);
|
||||
}
|
||||
};
|
||||
|
||||
const contextLengthBefore = this._lastRenderTokenCount;
|
||||
|
||||
try {
|
||||
const renderEndpoint = proactiveInlineSummarization
|
||||
? endpoint.cloneWithTokenOverride(endpoint.modelMaxPromptTokens * INLINE_SUMMARIZATION_BUDGET_EXPANSION)
|
||||
: endpoint;
|
||||
const renderProps: AgentPromptProps = proactiveInlineSummarization
|
||||
? { ...props, endpoint: renderEndpoint, inlineSummarization: true }
|
||||
: props;
|
||||
const renderer = PromptRenderer.create(this.instantiationService, renderEndpoint, this.prompt, renderProps);
|
||||
const renderer = PromptRenderer.create(this.instantiationService, endpoint, this.prompt, props);
|
||||
result = await renderer.render(progress, token);
|
||||
} catch (e) {
|
||||
if (e instanceof BudgetExceededError && summarizationEnabled) {
|
||||
@@ -670,7 +593,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
|
||||
let budgetExceededTrigger: string;
|
||||
if (backgroundSummarizer.state === BackgroundSummarizationState.InProgress) {
|
||||
budgetExceededTrigger = 'budgetExceededWaited';
|
||||
this.logService.debug(`[Agent] budget exceeded — waiting on in-progress background compaction instead of new request`);
|
||||
this.logService.debug(`[ConversationHistorySummarizer] budget exceeded — waiting on in-progress background compaction instead of new request`);
|
||||
const summaryPromise = backgroundSummarizer.waitForCompletion();
|
||||
progress.report(new ChatResponseProgressPart2(l10n.t('Compacting conversation...'), async () => {
|
||||
try { await summaryPromise; } catch { }
|
||||
@@ -679,12 +602,12 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
|
||||
await summaryPromise;
|
||||
} else {
|
||||
budgetExceededTrigger = 'budgetExceededReady';
|
||||
this.logService.debug(`[Agent] budget exceeded — applying already-completed background compaction`);
|
||||
this.logService.debug(`[ConversationHistorySummarizer] budget exceeded — applying already-completed background compaction`);
|
||||
progress.report(new ChatResponseProgressPart2(l10n.t('Compacted conversation'), async () => l10n.t('Compacted conversation')));
|
||||
}
|
||||
const bgResult = backgroundSummarizer.consumeAndReset();
|
||||
if (bgResult) {
|
||||
this.logService.debug(`[Agent] background compaction applied after budget exceeded (roundId=${bgResult.toolCallRoundId})`);
|
||||
this.logService.debug(`[ConversationHistorySummarizer] background compaction applied after budget exceeded (roundId=${bgResult.toolCallRoundId})`);
|
||||
this._applySummaryToRounds(bgResult, promptContext);
|
||||
this._persistSummaryOnTurn(bgResult, promptContext, contextLengthBefore);
|
||||
this._sendBackgroundCompactionTelemetry(budgetExceededTrigger, 'applied', contextRatio, promptContext);
|
||||
@@ -693,14 +616,12 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
|
||||
const renderer = PromptRenderer.create(this.instantiationService, endpoint, this.prompt, { ...props, promptContext });
|
||||
result = await renderer.render(progress, token);
|
||||
} else {
|
||||
this.logService.debug(`[Agent] background compaction produced no usable result after budget exceeded — falling back to synchronous summarization`);
|
||||
this.logService.debug(`[ConversationHistorySummarizer] background compaction produced no usable result after budget exceeded — falling back to synchronous summarization`);
|
||||
this._sendBackgroundCompactionTelemetry(budgetExceededTrigger, 'noResult', contextRatio, promptContext);
|
||||
this._recordBackgroundCompactionFailure(promptContext, budgetExceededTrigger);
|
||||
// Background compaction failed — fall back to synchronous summarization
|
||||
result = await renderWithSummarization(`budget exceeded(${e.message}), background compaction failed`);
|
||||
}
|
||||
} else if (inlineSummarizationEnabled) {
|
||||
result = await renderWithInlineSummarization(`budget exceeded(${e.message})`);
|
||||
} else {
|
||||
result = await renderWithSummarization(`budget exceeded(${e.message})`);
|
||||
}
|
||||
@@ -734,47 +655,27 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
|
||||
));
|
||||
}
|
||||
|
||||
// 3. Post-render background compaction checks.
|
||||
if (backgroundCompactionEnabled && backgroundSummarizer && !summaryAppliedThisIteration) {
|
||||
// Post-render: kick off background compaction at ≥ 80% if idle.
|
||||
if (summarizationEnabled && backgroundSummarizer && !summaryAppliedThisIteration) {
|
||||
const postRenderRatio = baseBudget > 0
|
||||
? (result.tokenCount + toolTokens) / baseBudget
|
||||
: 0;
|
||||
|
||||
if (postRenderRatio >= 0.95 && backgroundSummarizer.state === BackgroundSummarizationState.InProgress) {
|
||||
// At ≥ 95% with a background compaction already running — block,
|
||||
// wait for it, apply the result, and re-render so the LLM gets
|
||||
// the compacted prompt instead of the oversized one.
|
||||
this.logService.debug(`[Agent] post-render at ${(postRenderRatio * 100).toFixed(0)}% — blocking on in-progress background compaction`);
|
||||
const summaryPromise = backgroundSummarizer.waitForCompletion();
|
||||
progress.report(new ChatResponseProgressPart2(l10n.t('Compacting conversation...'), async () => {
|
||||
try { await summaryPromise; } catch { }
|
||||
return l10n.t('Compacted conversation');
|
||||
}));
|
||||
await summaryPromise;
|
||||
const bgResult = backgroundSummarizer.consumeAndReset();
|
||||
if (bgResult) {
|
||||
this.logService.debug(`[Agent] post-render background compaction completed — applying result and re-rendering (roundId=${bgResult.toolCallRoundId})`);
|
||||
this._applySummaryToRounds(bgResult, promptContext);
|
||||
this._persistSummaryOnTurn(bgResult, promptContext, result.tokenCount);
|
||||
this._sendBackgroundCompactionTelemetry('postRenderBlocked', 'applied', postRenderRatio, promptContext);
|
||||
// Re-render with compacted history so the LLM receives the smaller prompt
|
||||
const reRenderer = PromptRenderer.create(this.instantiationService, endpoint, this.prompt, { ...props, promptContext });
|
||||
result = await reRenderer.render(progress, token);
|
||||
this._lastRenderTokenCount = result.tokenCount;
|
||||
} else {
|
||||
this.logService.debug(`[Agent] post-render background compaction finished but produced no usable result — falling back to foreground summarization`);
|
||||
this._sendBackgroundCompactionTelemetry('postRenderBlocked', 'noResult', postRenderRatio, promptContext);
|
||||
this._recordBackgroundCompactionFailure(promptContext, 'postRenderBlocked');
|
||||
try {
|
||||
result = await renderWithSummarization('post-render background compaction noResult fallback');
|
||||
this._lastRenderTokenCount = result.tokenCount;
|
||||
} catch (e) {
|
||||
this.logService.error(e, `[Agent] post-render foreground summarization fallback also failed — using original render result`);
|
||||
}
|
||||
if (postRenderRatio >= 0.80 && (backgroundSummarizer.state === BackgroundSummarizationState.Idle || backgroundSummarizer.state === BackgroundSummarizationState.Failed)) {
|
||||
if (useInlineSummarization) {
|
||||
// Compute and cache model capabilities from the current render's
|
||||
// messages. These must match the main agent fetch for cache parity.
|
||||
const strippedMessages = ToolCallingLoop.stripInternalToolCallIds(result.messages);
|
||||
const rawEffort = this.request.modelConfiguration?.reasoningEffort;
|
||||
const isSubagent = !!this.request.subAgentInvocationId;
|
||||
this._lastModelCapabilities = {
|
||||
enableThinking: !isAnthropicFamily(this.endpoint) || ToolCallingLoop.messagesContainThinking(strippedMessages),
|
||||
reasoningEffort: typeof rawEffort === 'string' ? rawEffort : undefined,
|
||||
enableToolSearch: !isSubagent && isAnthropicToolSearchEnabled(this.endpoint, this.configurationService),
|
||||
enableContextEditing: !isSubagent && isAnthropicContextEditingEnabled(this.endpoint, this.configurationService, this.expService),
|
||||
};
|
||||
}
|
||||
} else if (postRenderRatio >= 0.80 && (backgroundSummarizer.state === BackgroundSummarizationState.Idle || backgroundSummarizer.state === BackgroundSummarizationState.Failed)) {
|
||||
// At ≥ 80% with no running compaction (or a previous failure) — kick off background work.
|
||||
this._startBackgroundSummarization(backgroundSummarizer, props, token, postRenderRatio);
|
||||
this._startBackgroundSummarization(backgroundSummarizer, result.messages, promptContext, props, token, postRenderRatio, useInlineSummarization);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -841,52 +742,239 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
|
||||
|
||||
private _startBackgroundSummarization(
|
||||
backgroundSummarizer: BackgroundSummarizer,
|
||||
mainRenderMessages: Raw.ChatMessage[],
|
||||
promptContext: IBuildPromptContext,
|
||||
props: AgentPromptProps,
|
||||
token: vscode.CancellationToken,
|
||||
contextRatio: number,
|
||||
useInlineSummarization: boolean,
|
||||
): void {
|
||||
this.logService.debug(`[Agent] context at ${(contextRatio * 100).toFixed(0)}% — starting background compaction`);
|
||||
// Deep-copy toolCallRounds and toolCallResults so the background render
|
||||
// sees a frozen snapshot and doesn't drift as the main loop adds rounds.
|
||||
const snapshotProps: AgentPromptProps = {
|
||||
...props,
|
||||
promptContext: {
|
||||
...props.promptContext,
|
||||
toolCallRounds: props.promptContext.toolCallRounds ? [...props.promptContext.toolCallRounds] : undefined,
|
||||
toolCallResults: props.promptContext.toolCallResults ? { ...props.promptContext.toolCallResults } : undefined,
|
||||
}
|
||||
};
|
||||
const bgRenderer = PromptRenderer.create(this.instantiationService, this.endpoint, this.prompt, {
|
||||
...snapshotProps,
|
||||
endpoint: this.endpoint,
|
||||
promptContext: snapshotProps.promptContext,
|
||||
triggerSummarize: true,
|
||||
summarizationSource: 'background',
|
||||
});
|
||||
const bgProgress: vscode.Progress<vscode.ChatResponseReferencePart | vscode.ChatResponseProgressPart> = { report: () => { } };
|
||||
this.logService.debug(`[ConversationHistorySummarizer] context at ${(contextRatio * 100).toFixed(0)}% — starting background compaction (inline=${useInlineSummarization})`);
|
||||
|
||||
const bgStartTime = Date.now();
|
||||
|
||||
// Snapshot rounds so telemetry reflects state at kick-off time, not at
|
||||
// completion time (the main loop mutates toolCallRounds). History is
|
||||
// stable across a single user turn so a reference is sufficient.
|
||||
const rounds = [...(promptContext.toolCallRounds ?? [])];
|
||||
const history = promptContext.history;
|
||||
let toolCallRoundId: string | undefined;
|
||||
if (rounds.length >= 2) {
|
||||
// Mark the round before the last, preserving the last round verbatim
|
||||
toolCallRoundId = rounds[rounds.length - 2].id;
|
||||
} else if (rounds.length === 1) {
|
||||
toolCallRoundId = rounds[0].id;
|
||||
} else {
|
||||
for (let i = history.length - 1; i >= 0 && !toolCallRoundId; i--) {
|
||||
const lastRound = history[i].rounds.at(-1);
|
||||
if (lastRound) {
|
||||
toolCallRoundId = lastRound.id;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build tool schemas matching the main agent loop so the prompt
|
||||
// prefix (system + tools + messages) is identical for cache hits.
|
||||
const availableTools = promptContext.tools?.availableTools;
|
||||
const normalizedTools = availableTools?.length ? normalizeToolSchema(
|
||||
this.endpoint.family,
|
||||
availableTools.map(tool => ({
|
||||
function: {
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
parameters: tool.inputSchema && Object.keys(tool.inputSchema).length ? tool.inputSchema : undefined
|
||||
},
|
||||
type: 'function' as const,
|
||||
})),
|
||||
(tool, rule) => {
|
||||
this.logService.warn(`[ConversationHistorySummarizer] Tool ${tool} failed validation: ${rule}`);
|
||||
},
|
||||
) : undefined;
|
||||
const toolOpts = normalizedTools?.length ? {
|
||||
tools: normalizedTools,
|
||||
} : undefined;
|
||||
|
||||
const associatedRequestId = promptContext.conversation?.getLatestTurn()?.id;
|
||||
const conversationId = promptContext.conversation?.sessionId;
|
||||
const modelCapabilities = this._lastModelCapabilities;
|
||||
|
||||
backgroundSummarizer.start(async bgToken => {
|
||||
try {
|
||||
const bgRenderResult = await bgRenderer.render(bgProgress, bgToken);
|
||||
const summaryMetadata = bgRenderResult.metadata.get(SummarizedConversationHistoryMetadata);
|
||||
if (!summaryMetadata) {
|
||||
throw new Error('Background compaction produced no summary metadata');
|
||||
if (useInlineSummarization) {
|
||||
// Inline mode: fork the exact messages from the main render
|
||||
// and append a summary user message. The prompt prefix is
|
||||
// byte-identical to the main agent loop for cache hits.
|
||||
const strippedMainMessages = ToolCallingLoop.stripInternalToolCallIds(mainRenderMessages);
|
||||
const summaryMsgResult = await renderPromptElement(
|
||||
this.instantiationService,
|
||||
this.endpoint,
|
||||
InlineSummarizationUserMessage,
|
||||
{ endpoint: this.endpoint },
|
||||
undefined,
|
||||
bgToken,
|
||||
);
|
||||
const messages = [
|
||||
...strippedMainMessages,
|
||||
...summaryMsgResult.messages,
|
||||
];
|
||||
|
||||
const response = await this.endpoint.makeChatRequest2({
|
||||
debugName: 'summarizeConversationHistory-inline',
|
||||
messages,
|
||||
finishedCb: undefined,
|
||||
location: ChatLocation.Agent,
|
||||
conversationId,
|
||||
requestOptions: {
|
||||
temperature: 0,
|
||||
stream: false,
|
||||
...toolOpts,
|
||||
},
|
||||
modelCapabilities,
|
||||
telemetryProperties: associatedRequestId ? { associatedRequestId } : undefined,
|
||||
enableRetryOnFilter: true,
|
||||
}, bgToken);
|
||||
if (response.type !== ChatFetchResponseType.Success) {
|
||||
throw new Error(`Background inline summarization request failed: ${response.type}`);
|
||||
}
|
||||
const summaryText = extractInlineSummary(response.value);
|
||||
if (!summaryText) {
|
||||
throw new Error('Background inline summarization: no <summary> tags found in response');
|
||||
}
|
||||
if (!toolCallRoundId) {
|
||||
throw new Error('Background inline summarization: no round ID to apply summary to');
|
||||
}
|
||||
this.logService.debug(`[ConversationHistorySummarizer] background inline compaction completed (${summaryText.length} chars, roundId=${toolCallRoundId})`);
|
||||
|
||||
// Send summarizedConversationHistory telemetry for parity
|
||||
// with the standard ConversationHistorySummarizer path.
|
||||
const numRoundsInHistory = history.reduce((sum, t) => sum + t.rounds.length, 0);
|
||||
const numRoundsInCurrentTurn = rounds.length;
|
||||
const lastUsedTool = rounds.at(-1)?.toolCalls?.at(-1)?.name
|
||||
?? history.at(-1)?.rounds.at(-1)?.toolCalls?.at(-1)?.name ?? 'none';
|
||||
/* __GDPR__
|
||||
"summarizedConversationHistory" : {
|
||||
"owner": "bhavyau",
|
||||
"comment": "Tracks background inline summarization outcome",
|
||||
"outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state." },
|
||||
"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." },
|
||||
"summarizationMode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The summarization mode." },
|
||||
"source": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether background or foreground." },
|
||||
"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Session id." },
|
||||
"chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID." },
|
||||
"lastUsedTool": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The last tool used before summarization." },
|
||||
"requestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The request ID from the summarization call." },
|
||||
"numRounds": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Total tool call rounds." },
|
||||
"turnIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current turn." },
|
||||
"curTurnRoundIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current round within the current turn." },
|
||||
"isDuringToolCalling": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Whether this was triggered during tool calling." },
|
||||
"duration": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Duration in ms." },
|
||||
"promptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Prompt tokens." },
|
||||
"promptCacheTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Cached prompt tokens." },
|
||||
"responseTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Output tokens." }
|
||||
}
|
||||
*/
|
||||
this.telemetryService.sendMSFTTelemetryEvent('summarizedConversationHistory', {
|
||||
outcome: 'success',
|
||||
model: this.endpoint.model,
|
||||
summarizationMode: 'inline',
|
||||
source: 'background',
|
||||
conversationId,
|
||||
chatRequestId: associatedRequestId,
|
||||
lastUsedTool,
|
||||
requestId: response.requestId,
|
||||
}, {
|
||||
numRounds: numRoundsInHistory + numRoundsInCurrentTurn,
|
||||
turnIndex: history.length,
|
||||
curTurnRoundIndex: numRoundsInCurrentTurn,
|
||||
isDuringToolCalling: numRoundsInCurrentTurn > 0 ? 1 : 0,
|
||||
duration: Date.now() - bgStartTime,
|
||||
promptTokenCount: response.usage?.prompt_tokens,
|
||||
promptCacheTokenCount: response.usage?.prompt_tokens_details?.cached_tokens,
|
||||
responseTokenCount: response.usage?.completion_tokens,
|
||||
});
|
||||
|
||||
return {
|
||||
summary: summaryText,
|
||||
toolCallRoundId,
|
||||
promptTokens: response.usage?.prompt_tokens,
|
||||
promptCacheTokens: response.usage?.prompt_tokens_details?.cached_tokens,
|
||||
outputTokens: response.usage?.completion_tokens,
|
||||
durationMs: Date.now() - bgStartTime,
|
||||
model: this.endpoint.model,
|
||||
summarizationMode: 'inline',
|
||||
numRounds: undefined,
|
||||
numRoundsSinceLastSummarization: undefined,
|
||||
};
|
||||
} else {
|
||||
// Standard mode: use triggerSummarize which makes a separate
|
||||
// LLM call with a summarization-specific prompt during render.
|
||||
const snapshotProps: AgentPromptProps = {
|
||||
...props,
|
||||
promptContext: {
|
||||
...promptContext,
|
||||
toolCallRounds: promptContext.toolCallRounds ? [...promptContext.toolCallRounds] : undefined,
|
||||
toolCallResults: promptContext.toolCallResults ? { ...promptContext.toolCallResults } : undefined,
|
||||
}
|
||||
};
|
||||
const bgRenderer = PromptRenderer.create(this.instantiationService, this.endpoint, this.prompt, {
|
||||
...snapshotProps,
|
||||
endpoint: this.endpoint,
|
||||
promptContext: snapshotProps.promptContext,
|
||||
triggerSummarize: true,
|
||||
summarizationSource: 'background',
|
||||
});
|
||||
const bgProgress: vscode.Progress<vscode.ChatResponseReferencePart | vscode.ChatResponseProgressPart> = { report: () => { } };
|
||||
const bgRenderResult = await bgRenderer.render(bgProgress, bgToken);
|
||||
const summaryMetadata = bgRenderResult.metadata.get(SummarizedConversationHistoryMetadata);
|
||||
if (!summaryMetadata) {
|
||||
throw new Error('Background compaction produced no summary metadata');
|
||||
}
|
||||
this.logService.debug(`[ConversationHistorySummarizer] background compaction completed successfully (roundId=${summaryMetadata.toolCallRoundId})`);
|
||||
return {
|
||||
summary: summaryMetadata.text,
|
||||
toolCallRoundId: summaryMetadata.toolCallRoundId,
|
||||
promptTokens: summaryMetadata.usage?.prompt_tokens,
|
||||
promptCacheTokens: summaryMetadata.usage?.prompt_tokens_details?.cached_tokens,
|
||||
outputTokens: summaryMetadata.usage?.completion_tokens,
|
||||
durationMs: Date.now() - bgStartTime,
|
||||
model: summaryMetadata.model,
|
||||
summarizationMode: summaryMetadata.summarizationMode,
|
||||
numRounds: summaryMetadata.numRounds,
|
||||
numRoundsSinceLastSummarization: summaryMetadata.numRoundsSinceLastSummarization,
|
||||
};
|
||||
}
|
||||
this.logService.debug(`[Agent] background compaction completed successfully (roundId=${summaryMetadata.toolCallRoundId})`);
|
||||
return {
|
||||
summary: summaryMetadata.text,
|
||||
toolCallRoundId: summaryMetadata.toolCallRoundId,
|
||||
promptTokens: summaryMetadata.usage?.prompt_tokens,
|
||||
promptCacheTokens: summaryMetadata.usage?.prompt_tokens_details?.cached_tokens,
|
||||
outputTokens: summaryMetadata.usage?.completion_tokens,
|
||||
durationMs: Date.now() - bgStartTime,
|
||||
model: summaryMetadata.model,
|
||||
summarizationMode: summaryMetadata.summarizationMode,
|
||||
numRounds: summaryMetadata.numRounds,
|
||||
numRoundsSinceLastSummarization: summaryMetadata.numRoundsSinceLastSummarization,
|
||||
};
|
||||
} catch (err) {
|
||||
this.logService.error(err, `[Agent] background compaction failed`);
|
||||
this.logService.error(err, `[ConversationHistorySummarizer] background compaction failed`);
|
||||
|
||||
// Send failure telemetry for inline background summarization
|
||||
if (useInlineSummarization) {
|
||||
/* __GDPR__
|
||||
"summarizedConversationHistory" : {
|
||||
"owner": "bhavyau",
|
||||
"comment": "Tracks background inline summarization failure",
|
||||
"outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state." },
|
||||
"detailedOutcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Detailed failure reason." },
|
||||
"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." },
|
||||
"summarizationMode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The summarization mode." },
|
||||
"source": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether background or foreground." },
|
||||
"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Session id." },
|
||||
"chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID." },
|
||||
"duration": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Duration in ms." }
|
||||
}
|
||||
*/
|
||||
this.telemetryService.sendMSFTTelemetryEvent('summarizedConversationHistory', {
|
||||
outcome: 'failed',
|
||||
detailedOutcome: err instanceof Error ? err.message : String(err),
|
||||
model: this.endpoint.model,
|
||||
summarizationMode: 'inline',
|
||||
source: 'background',
|
||||
conversationId,
|
||||
chatRequestId: associatedRequestId,
|
||||
}, {
|
||||
duration: Date.now() - bgStartTime,
|
||||
});
|
||||
}
|
||||
|
||||
throw err;
|
||||
}
|
||||
}, token);
|
||||
@@ -924,7 +1012,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
this.logService.warn(`[Agent] background compaction round ${bgResult.toolCallRoundId} not found in toolCallRounds or history — summary dropped`);
|
||||
this.logService.warn(`[ConversationHistorySummarizer] background compaction round ${bgResult.toolCallRoundId} not found in toolCallRounds or history — summary dropped`);
|
||||
}
|
||||
}
|
||||
// Invalidate the auto mode router cache so the next getChatEndpoint()
|
||||
|
||||
@@ -48,7 +48,7 @@ import { ThinkingDataItem, ToolCallRound } from '../../prompt/common/toolCallRou
|
||||
import { IBuildPromptResult, IResponseProcessor } from '../../prompt/node/intents';
|
||||
import { PseudoStopStartResponseProcessor } from '../../prompt/node/pseudoStartStopConversationCallback';
|
||||
import { ResponseProcessorContext } from '../../prompt/node/responseProcessorContext';
|
||||
import { extractInlineSummary, InlineSummarizationRequestedMetadata, SummarizedConversationHistoryMetadata } from '../../prompts/node/agent/summarizedConversationHistory';
|
||||
import { SummarizedConversationHistoryMetadata } from '../../prompts/node/agent/summarizedConversationHistory';
|
||||
import { ToolFailureEncountered, ToolResultMetadata } from '../../prompts/node/panel/toolCalling';
|
||||
import { ToolName } from '../../tools/common/toolNames';
|
||||
import { IToolsService, ToolCallCancelledError } from '../../tools/common/toolsService';
|
||||
@@ -355,9 +355,6 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
|
||||
private taskCompleted = false;
|
||||
private autopilotStopHookActive = false;
|
||||
private autopilotProgressDeferred: DeferredPromise<void> | undefined;
|
||||
private inlineSummarizationProgressDeferred: DeferredPromise<void> | undefined;
|
||||
/** Set to true before calling fetch() when the current iteration is an inline summarization request. */
|
||||
protected _isInlineSummarizationRequest = false;
|
||||
|
||||
/**
|
||||
* Autopilot stop hook — the model needs to call `task_complete` to signal it's done.
|
||||
@@ -913,145 +910,6 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
|
||||
this._sessionTranscriptService.logAssistantTurnEnd(sessionId, turnId);
|
||||
agentSpan?.addEvent('turn_end', { turnId, ...(chatSessionId ? { [CopilotChatAttr.CHAT_SESSION_ID]: chatSessionId } : {}) });
|
||||
|
||||
// Inline summarization: the model responded with summary text only (no tool calls).
|
||||
// Extract the summary, store it on the appropriate round, and continue the loop.
|
||||
if (result.inlineSummarizationRequested && !result.round.toolCalls.length) {
|
||||
if (result.response.type !== ChatFetchResponseType.Success) {
|
||||
this.inlineSummarizationProgressDeferred?.complete(undefined);
|
||||
this.inlineSummarizationProgressDeferred = undefined;
|
||||
} else {
|
||||
const summaryText = extractInlineSummary(result.round.response);
|
||||
if (summaryText !== undefined) {
|
||||
const summarizedRound = this.applySummaryToRound(summaryText);
|
||||
|
||||
if (summarizedRound) {
|
||||
// Persist summary on the turn so normalizeSummariesOnRounds can restore it
|
||||
const turn = this.turn;
|
||||
const resolvedModel = result.response.resolvedModel;
|
||||
const usage = result.response.usage;
|
||||
turn.addPendingSummary(summarizedRound, summaryText);
|
||||
|
||||
const history = this.options.conversation.turns.slice(0, -1);
|
||||
// Exclude the summarization round from telemetry counts for parity with separate-call summarization
|
||||
const toolCallRoundsForTelemetry = this.toolCallRounds.slice(0, -1);
|
||||
const numRoundsInHistory = history.reduce((sum, t) => sum + t.rounds.length, 0);
|
||||
const numRoundsInCurrentTurn = toolCallRoundsForTelemetry.length;
|
||||
const numRounds = numRoundsInHistory + numRoundsInCurrentTurn;
|
||||
const lastUsedTool = toolCallRoundsForTelemetry.at(-1)?.toolCalls.at(-1)?.name
|
||||
?? history.at(-1)?.rounds.at(-1)?.toolCalls.at(-1)?.name ?? 'none';
|
||||
|
||||
// Compute rounds since last summarization (same logic as ConversationHistorySummarizer)
|
||||
let numRoundsSinceLastSummarization = -1;
|
||||
for (let ri = toolCallRoundsForTelemetry.length - 1; ri >= 0; ri--) {
|
||||
if (toolCallRoundsForTelemetry[ri].summary) {
|
||||
numRoundsSinceLastSummarization = toolCallRoundsForTelemetry.length - 1 - ri;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (numRoundsSinceLastSummarization === -1) {
|
||||
let count = numRoundsInCurrentTurn;
|
||||
outerLoop: for (let ti = history.length - 1; ti >= 0; ti--) {
|
||||
for (let ri = history[ti].rounds.length - 1; ri >= 0; ri--) {
|
||||
if (history[ti].rounds[ri].summary) {
|
||||
numRoundsSinceLastSummarization = count;
|
||||
break outerLoop;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const inlineSummarizationMeta = new SummarizedConversationHistoryMetadata(
|
||||
summarizedRound,
|
||||
summaryText,
|
||||
{
|
||||
usage,
|
||||
model: resolvedModel,
|
||||
summarizationMode: 'inline',
|
||||
numRounds,
|
||||
numRoundsSinceLastSummarization,
|
||||
source: 'foreground',
|
||||
outcome: 'success',
|
||||
},
|
||||
);
|
||||
turn.setMetadata(inlineSummarizationMeta);
|
||||
|
||||
// Fire telemetry matching the existing summarizedConversationHistory event
|
||||
/* __GDPR__
|
||||
"summarizedConversationHistory" : {
|
||||
"owner": "bhavyau",
|
||||
"comment": "Tracks inline summarization",
|
||||
"outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state." },
|
||||
"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." },
|
||||
"summarizationMode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The summarization mode." },
|
||||
"source": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether background or foreground." },
|
||||
"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Session id." },
|
||||
"chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID." },
|
||||
"lastUsedTool": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The last tool used before summarization." },
|
||||
"requestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The request ID from the summarization call." },
|
||||
"numRounds": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Total tool call rounds." },
|
||||
"numRoundsSinceLastSummarization": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Rounds since last summarization." },
|
||||
"turnIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current turn." },
|
||||
"curTurnRoundIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current round within the current turn." },
|
||||
"isDuringToolCalling": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Whether this was triggered during tool calling." },
|
||||
"promptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Prompt tokens." },
|
||||
"promptCacheTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Cached prompt tokens." },
|
||||
"responseTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Output tokens." }
|
||||
}
|
||||
*/
|
||||
this._telemetryService.sendMSFTTelemetryEvent('summarizedConversationHistory', {
|
||||
outcome: 'success',
|
||||
model: resolvedModel,
|
||||
summarizationMode: 'inline',
|
||||
source: 'foreground',
|
||||
conversationId: this.options.conversation.sessionId,
|
||||
chatRequestId: turn.id,
|
||||
lastUsedTool,
|
||||
requestId: result.response.requestId,
|
||||
}, {
|
||||
numRounds,
|
||||
numRoundsSinceLastSummarization,
|
||||
turnIndex: history.length,
|
||||
curTurnRoundIndex: numRoundsInCurrentTurn,
|
||||
isDuringToolCalling: numRoundsInCurrentTurn > 0 ? 1 : 0,
|
||||
promptTokenCount: usage?.prompt_tokens,
|
||||
promptCacheTokenCount: usage?.prompt_tokens_details?.cached_tokens,
|
||||
responseTokenCount: usage?.completion_tokens,
|
||||
});
|
||||
GenAiMetrics.incrementAgentSummarizationCount(this._otelService, 'success');
|
||||
|
||||
this._logService.info(`[ToolCallingLoop] Inline summarization extracted (${summaryText.length} chars, roundId=${summarizedRound}), continuing loop`);
|
||||
|
||||
// Remove the summarization round — it served its purpose
|
||||
// and shouldn't be rendered as an assistant message in
|
||||
// subsequent iterations (otherwise the model sees both
|
||||
// the compacted <conversation-summary> AND the raw
|
||||
// <analysis>...<summary>...</summary> response).
|
||||
this.toolCallRounds.pop();
|
||||
|
||||
// Resolve the "Compacting conversation..." progress to show "Compacted conversation"
|
||||
this.inlineSummarizationProgressDeferred?.complete(undefined);
|
||||
this.inlineSummarizationProgressDeferred = undefined;
|
||||
continue;
|
||||
} else {
|
||||
this._logService.warn(`[ToolCallingLoop] Inline summarization: no round found to store summary on`);
|
||||
this._sendInlineSummarizationFailureTelemetry('noRoundFound', result.response);
|
||||
GenAiMetrics.incrementAgentSummarizationCount(this._otelService, 'failed');
|
||||
this.inlineSummarizationProgressDeferred?.complete(undefined);
|
||||
this.inlineSummarizationProgressDeferred = undefined;
|
||||
// Fall through to normal no-tool-calls handling (will break the loop)
|
||||
}
|
||||
} else {
|
||||
this._logService.warn(`[ToolCallingLoop] Inline summarization requested but no summary extracted from response`);
|
||||
this._sendInlineSummarizationFailureTelemetry('extractionFailed', result.response);
|
||||
GenAiMetrics.incrementAgentSummarizationCount(this._otelService, 'failed');
|
||||
this.inlineSummarizationProgressDeferred?.complete(undefined);
|
||||
this.inlineSummarizationProgressDeferred = undefined;
|
||||
// Fall through to normal no-tool-calls handling (will break the loop)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the model produced productive (non-task_complete) tool calls after being nudged,
|
||||
// reset the stop hook flag and iteration count so it can be nudged again.
|
||||
if (this.autopilotStopHookActive && result.round.toolCalls.length && !result.round.toolCalls.some(tc => tc.name === ToolCallingLoop.TASK_COMPLETE_TOOL_NAME)) {
|
||||
@@ -1133,8 +991,6 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
|
||||
break;
|
||||
}
|
||||
} catch (e) {
|
||||
this.inlineSummarizationProgressDeferred?.complete(undefined);
|
||||
this.inlineSummarizationProgressDeferred = undefined;
|
||||
if (isCancellationError(e) && lastResult) {
|
||||
break;
|
||||
}
|
||||
@@ -1332,19 +1188,6 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
|
||||
if (conversationSummary) {
|
||||
this.turn.setMetadata(conversationSummary);
|
||||
}
|
||||
const inlineSummarizationRequested = !!effectiveBuildPromptResult.metadata.get(InlineSummarizationRequestedMetadata);
|
||||
|
||||
// Show "Compacting conversation..." progress during the inline summarization
|
||||
// fetch. The deferred is resolved in _runLoop after the summary is extracted.
|
||||
if (inlineSummarizationRequested) {
|
||||
this.inlineSummarizationProgressDeferred?.complete(undefined);
|
||||
const deferred = new DeferredPromise<void>();
|
||||
this.inlineSummarizationProgressDeferred = deferred;
|
||||
outputStream?.progress(l10n.t('Compacting conversation...'), async () => {
|
||||
await deferred.p;
|
||||
return l10n.t('Compacted conversation');
|
||||
});
|
||||
}
|
||||
|
||||
const endpoint = await this._endpointProvider.getChatEndpoint(this.options.request);
|
||||
const tokenizer = endpoint.acquireTokenizer();
|
||||
@@ -1381,14 +1224,11 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
|
||||
|
||||
this._logService.trace('Sending prompt to model');
|
||||
|
||||
// When inline summarization is requested, suppress streaming so the
|
||||
// summary text (with <summary> tags) is not shown to the user.
|
||||
const effectiveOutputStream = inlineSummarizationRequested ? undefined : outputStream;
|
||||
const streamParticipants = effectiveOutputStream ? [effectiveOutputStream] : [];
|
||||
const streamParticipants = outputStream ? [outputStream] : [];
|
||||
let fetchStreamSource: FetchStreamSource | undefined;
|
||||
let processResponsePromise: Promise<ChatResult | void> | undefined;
|
||||
let stopEarly = false;
|
||||
if (effectiveOutputStream) {
|
||||
if (outputStream) {
|
||||
this.options.streamParticipants?.forEach(fn => {
|
||||
streamParticipants.push(fn(streamParticipants[streamParticipants.length - 1]));
|
||||
});
|
||||
@@ -1428,7 +1268,6 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
|
||||
const enableThinking = !shouldDisableThinking;
|
||||
let phase: string | undefined;
|
||||
let compaction: OpenAIContextManagementResponse | undefined;
|
||||
this._isInlineSummarizationRequest = inlineSummarizationRequested;
|
||||
markChatExt(this.options.conversation.sessionId, ChatExtPerfMark.WillFetch);
|
||||
const fetchResult = await this.fetch({
|
||||
messages: this.applyMessagePostProcessing(effectiveBuildPromptResult.messages, { stripOrphanedToolCalls: isGeminiFamily(endpoint) }),
|
||||
@@ -1559,7 +1398,6 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
|
||||
hadIgnoredFiles: buildPromptResult.hasIgnoredFiles,
|
||||
lastRequestMessages: effectiveBuildPromptResult.messages,
|
||||
availableTools,
|
||||
inlineSummarizationRequested,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1569,7 +1407,6 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
|
||||
lastRequestMessages: effectiveBuildPromptResult.messages,
|
||||
availableTools,
|
||||
round: new ToolCallRound('', toolCalls, toolInputRetry),
|
||||
inlineSummarizationRequested,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1582,84 +1419,6 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
|
||||
return toolCallId + `__vscode-${ToolCallingLoop.NextToolCallId++}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the appropriate round and applies the summary text to it.
|
||||
*
|
||||
* After the summary round is pushed, `toolCallRounds` looks like:
|
||||
* [r0, ..., rK, summaryRound]
|
||||
*
|
||||
* We want to keep the last real tool-call round (rK) verbatim so the model
|
||||
* retains context of its most recent actions. The summary replaces everything
|
||||
* before rK.
|
||||
*
|
||||
* @returns The round ID that was marked with the summary, or `undefined` if
|
||||
* no suitable round was found.
|
||||
*/
|
||||
private applySummaryToRound(summaryText: string): string | undefined {
|
||||
const rounds = this.toolCallRounds;
|
||||
if (rounds.length > 2) {
|
||||
// 3+ rounds: mark the one before the last real round, preserving rK verbatim
|
||||
rounds[rounds.length - 3].summary = summaryText;
|
||||
return rounds[rounds.length - 3].id;
|
||||
} else if (rounds.length > 1) {
|
||||
// 2 rounds (one real + summaryRound): mark the real round
|
||||
rounds[rounds.length - 2].summary = summaryText;
|
||||
return rounds[rounds.length - 2].id;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fires a `summarizedConversationHistory` telemetry event for inline summarization failures,
|
||||
* matching the format of the existing `ConversationHistorySummarizer.sendSummarizationTelemetry()`.
|
||||
*/
|
||||
private _sendInlineSummarizationFailureTelemetry(detailedOutcome: string, response: ChatResponse): void {
|
||||
const history = this.options.conversation.turns.slice(0, -1);
|
||||
const numRoundsInHistory = history.reduce((sum, t) => sum + t.rounds.length, 0);
|
||||
const numRoundsInCurrentTurn = this.toolCallRounds.length;
|
||||
const resolvedModel = response.type === ChatFetchResponseType.Success ? response.resolvedModel : undefined;
|
||||
const requestId = response.type === ChatFetchResponseType.Success ? response.requestId : '';
|
||||
const usage = response.type === ChatFetchResponseType.Success ? response.usage : undefined;
|
||||
|
||||
/* __GDPR__
|
||||
"summarizedConversationHistory" : {
|
||||
"owner": "bhavyau",
|
||||
"comment": "Tracks inline summarization failure",
|
||||
"outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state." },
|
||||
"detailedOutcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Detailed failure reason." },
|
||||
"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." },
|
||||
"summarizationMode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The summarization mode." },
|
||||
"source": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether background or foreground." },
|
||||
"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Session id." },
|
||||
"chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID." },
|
||||
"requestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The request ID from the summarization call." },
|
||||
"numRounds": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Total tool call rounds." },
|
||||
"turnIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current turn." },
|
||||
"curTurnRoundIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current round within the current turn." },
|
||||
"promptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Prompt tokens." },
|
||||
"promptCacheTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Cached prompt tokens." },
|
||||
"responseTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Output tokens." }
|
||||
}
|
||||
*/
|
||||
this._telemetryService.sendMSFTTelemetryEvent('summarizedConversationHistory', {
|
||||
outcome: 'failed',
|
||||
detailedOutcome,
|
||||
model: resolvedModel,
|
||||
summarizationMode: 'inline',
|
||||
source: 'foreground',
|
||||
conversationId: this.options.conversation.sessionId,
|
||||
chatRequestId: this.turn.id,
|
||||
requestId,
|
||||
}, {
|
||||
numRounds: numRoundsInHistory + numRoundsInCurrentTurn,
|
||||
turnIndex: history.length,
|
||||
curTurnRoundIndex: numRoundsInCurrentTurn,
|
||||
promptTokenCount: usage?.prompt_tokens,
|
||||
promptCacheTokenCount: usage?.prompt_tokens_details?.cached_tokens,
|
||||
responseTokenCount: usage?.completion_tokens,
|
||||
});
|
||||
}
|
||||
|
||||
private applyMessagePostProcessing(messages: Raw.ChatMessage[], options?: { stripOrphanedToolCalls?: boolean }): Raw.ChatMessage[] {
|
||||
return this.validateToolMessages(
|
||||
ToolCallingLoop.stripInternalToolCallIds(messages), options);
|
||||
@@ -1879,8 +1638,6 @@ export interface IToolCallSingleResult {
|
||||
hadIgnoredFiles: boolean;
|
||||
lastRequestMessages: Raw.ChatMessage[];
|
||||
availableTools: readonly LanguageModelToolInformation[];
|
||||
/** Set when the prompt included inline summarization instructions. */
|
||||
inlineSummarizationRequested?: boolean;
|
||||
}
|
||||
|
||||
export interface IToolCallLoopResult extends IToolCallSingleResult {
|
||||
|
||||
@@ -687,10 +687,9 @@ class DefaultToolCallingLoop extends ToolCallingLoop<IDefaultToolLoopOptions> {
|
||||
|
||||
protected override async fetch(opts: ToolCallingLoopFetchOptions, token: CancellationToken): Promise<ChatResponse> {
|
||||
const messageSourcePrefix = this.options.location === ChatLocation.Editor ? 'inline' : 'chat';
|
||||
const baseDebugName = this.options.request.subAgentInvocationId ?
|
||||
const debugName = this.options.request.subAgentInvocationId ?
|
||||
`tool/runSubagent${this.options.request.subAgentName ? `-${this.options.request.subAgentName}` : ''}` :
|
||||
`${ChatLocation.toStringShorter(this.options.location)}/${this.options.intent?.id}`;
|
||||
const debugName = this._isInlineSummarizationRequest ? 'inlineSummarizeConversationHistory-full' : baseDebugName;
|
||||
const location = this.options.overrideRequestLocation ?? this.options.location;
|
||||
const isThinkingLocation = location === ChatLocation.Agent || location === ChatLocation.MessagesProxy;
|
||||
const rawEffort = this.options.request.modelConfiguration?.reasoningEffort;
|
||||
|
||||
@@ -56,14 +56,6 @@ export interface AgentPromptProps extends GenericBasePromptElementProps {
|
||||
|
||||
readonly triggerSummarize?: boolean;
|
||||
|
||||
/**
|
||||
* When true, appends a summarization instruction as a user message in the
|
||||
* current agent loop iteration instead of making a separate LLM call.
|
||||
* The model outputs ONLY a summary (no tool calls) and the loop continues
|
||||
* with the compacted history on the next iteration.
|
||||
*/
|
||||
readonly inlineSummarization?: boolean;
|
||||
|
||||
/**
|
||||
* Enables cache breakpoints and summarization
|
||||
*/
|
||||
@@ -151,7 +143,6 @@ export class AgentPrompt extends PromptElement<AgentPromptProps> {
|
||||
<SummarizedConversationHistory
|
||||
flexGrow={1}
|
||||
triggerSummarize={this.props.triggerSummarize}
|
||||
inlineSummarization={this.props.inlineSummarization}
|
||||
priority={900}
|
||||
promptContext={this.props.promptContext}
|
||||
location={this.props.location}
|
||||
|
||||
+7
-27
@@ -18,7 +18,6 @@ import { CUSTOM_TOOL_SEARCH_NAME } from '../../../../platform/networking/common/
|
||||
import { IChatEndpoint } from '../../../../platform/networking/common/networking';
|
||||
import { APIUsage } from '../../../../platform/networking/common/openai';
|
||||
import { IPromptPathRepresentationService } from '../../../../platform/prompts/common/promptPathRepresentationService';
|
||||
import { IExperimentationService } from '../../../../platform/telemetry/common/nullExperimentationService';
|
||||
import { ITelemetryService } from '../../../../platform/telemetry/common/telemetry';
|
||||
import { ThinkingData } from '../../../../platform/thinking/common/thinking';
|
||||
import { computePromptTokenDetails } from '../../../../platform/tokenizer/node/promptTokenDetails';
|
||||
@@ -398,8 +397,6 @@ export interface SummarizedAgentHistoryProps extends BasePromptElementProps, Age
|
||||
readonly location: ChatLocation;
|
||||
readonly promptContext: IBuildPromptContext;
|
||||
readonly triggerSummarize?: boolean;
|
||||
/** When true, appends a summarization instruction in the agent loop instead of a separate LLM call. */
|
||||
readonly inlineSummarization?: boolean;
|
||||
readonly tools?: ReadonlyArray<LanguageModelToolInformation> | undefined;
|
||||
readonly enableCacheBreakpoints?: boolean;
|
||||
readonly workingNotebook?: NotebookDocument;
|
||||
@@ -420,8 +417,6 @@ export class SummarizedConversationHistory extends PromptElement<SummarizedAgent
|
||||
props: SummarizedAgentHistoryProps,
|
||||
@IInstantiationService private readonly instantiationService: IInstantiationService,
|
||||
@ISessionTranscriptService private readonly sessionTranscriptService: ISessionTranscriptService,
|
||||
@IConfigurationService private readonly configurationService: IConfigurationService,
|
||||
@IExperimentationService private readonly experimentationService: IExperimentationService,
|
||||
) {
|
||||
super(props);
|
||||
}
|
||||
@@ -429,12 +424,10 @@ export class SummarizedConversationHistory extends PromptElement<SummarizedAgent
|
||||
override async render(state: void, sizing: PromptSizing, progress: Progress<ChatResponsePart> | undefined, token: CancellationToken | undefined) {
|
||||
const promptContext = { ...this.props.promptContext };
|
||||
let historyMetadata: SummarizedConversationHistoryMetadata | undefined;
|
||||
const transcriptLookupEnabled = this.configurationService.getExperimentBasedConfig(ConfigKey.ConversationTranscriptLookup, this.experimentationService);
|
||||
|
||||
// Resolve transcript path and flush to disk so the model can read the up-to-date file
|
||||
let transcriptPath: string | undefined;
|
||||
const sessionId = this.props.promptContext.conversation?.sessionId;
|
||||
if (transcriptLookupEnabled && sessionId) {
|
||||
if (sessionId) {
|
||||
// Lazily start the transcript session now (before summarization) so it
|
||||
// captures the full pre-compaction conversation. startSession is
|
||||
// idempotent — if hooks already started it, this is a no-op.
|
||||
@@ -479,18 +472,12 @@ export class SummarizedConversationHistory extends PromptElement<SummarizedAgent
|
||||
}
|
||||
}
|
||||
|
||||
// Inline summarization: append instruction as a user message in the agent loop
|
||||
// instead of making a separate LLM call. The model outputs only a summary.
|
||||
const inlineSummarizationRequested = this.props.inlineSummarization && !this.props.triggerSummarize;
|
||||
|
||||
return <>
|
||||
{historyMetadata && <meta value={historyMetadata} />}
|
||||
{inlineSummarizationRequested && <meta value={new InlineSummarizationRequestedMetadata()} />}
|
||||
<ConversationHistory
|
||||
{...this.props}
|
||||
promptContext={promptContext}
|
||||
enableCacheBreakpoints={this.props.enableCacheBreakpoints} />
|
||||
{inlineSummarizationRequested && <InlineSummarizationUserMessage priority={1000} endpoint={this.props.endpoint} />}
|
||||
</>;
|
||||
}
|
||||
|
||||
@@ -687,7 +674,7 @@ class ConversationHistorySummarizer {
|
||||
const budgetExceeded = e instanceof BudgetExceededError;
|
||||
const outcome = budgetExceeded ? 'budget_exceeded' : 'renderError';
|
||||
this.logInfo(`Error rendering summarization prompt in mode: ${mode}. ${e.stack}`, mode);
|
||||
this.sendSummarizationTelemetry(outcome, '', this.props.endpoint.model, mode, stopwatch.elapsed(), undefined);
|
||||
this.sendSummarizationTelemetry(outcome, '', this.props.endpoint.model, mode, stopwatch.elapsed(), undefined, e instanceof Error ? e.message : String(e));
|
||||
throw e;
|
||||
}
|
||||
|
||||
@@ -704,7 +691,7 @@ class ConversationHistorySummarizer {
|
||||
}, type: 'function'
|
||||
})),
|
||||
(tool, rule) => {
|
||||
this.logService.warn(`Tool ${tool} failed validation: ${rule}`);
|
||||
this.logService.warn(`[ConversationHistorySummarizer] Tool ${tool} failed validation: ${rule}`);
|
||||
},
|
||||
) : undefined;
|
||||
const toolOpts = normalizedTools?.length ? {
|
||||
@@ -766,7 +753,7 @@ class ConversationHistorySummarizer {
|
||||
}, this.token ?? CancellationToken.None);
|
||||
} catch (e) {
|
||||
this.logInfo(`Error from summarization request. ${e.message}`, mode);
|
||||
this.sendSummarizationTelemetry('requestThrow', '', this.props.endpoint.model, mode, stopwatch.elapsed(), undefined);
|
||||
this.sendSummarizationTelemetry('requestThrow', '', this.props.endpoint.model, mode, stopwatch.elapsed(), undefined, e instanceof Error ? e.message : String(e));
|
||||
throw e;
|
||||
}
|
||||
|
||||
@@ -806,7 +793,7 @@ class ConversationHistorySummarizer {
|
||||
? Math.min(this.sizing.tokenBudget, this.props.maxSummaryTokens)
|
||||
: this.sizing.tokenBudget;
|
||||
if (summarySize > effectiveBudget) {
|
||||
this.sendSummarizationTelemetry('too_large', response.requestId, this.props.endpoint.model, mode, elapsedTime, response.usage);
|
||||
this.sendSummarizationTelemetry('too_large', response.requestId, this.props.endpoint.model, mode, elapsedTime, response.usage, `${summarySize} tokens exceeds budget ${effectiveBudget}`);
|
||||
this.logInfo(`Summary too large: ${summarySize} tokens (effective budget ${effectiveBudget})`, mode);
|
||||
throw new Error('Summary too large');
|
||||
}
|
||||
@@ -1072,14 +1059,7 @@ class SummaryMessageElement extends PromptElement<SummaryMessageProps> {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Metadata flag indicating that inline summarization was requested in this render.
|
||||
* The caller (agentIntent) checks for this to know the model response should
|
||||
* contain only a summary.
|
||||
*/
|
||||
export class InlineSummarizationRequestedMetadata extends PromptMetadata { }
|
||||
|
||||
interface InlineSummarizationUserMessageProps extends BasePromptElementProps {
|
||||
export interface InlineSummarizationUserMessageProps extends BasePromptElementProps {
|
||||
readonly endpoint: IChatEndpoint;
|
||||
}
|
||||
|
||||
@@ -1089,7 +1069,7 @@ interface InlineSummarizationUserMessageProps extends BasePromptElementProps {
|
||||
* no tool calls. The summary is extracted from the response and stored on the round
|
||||
* for the next iteration.
|
||||
*/
|
||||
class InlineSummarizationUserMessage extends PromptElement<InlineSummarizationUserMessageProps> {
|
||||
export class InlineSummarizationUserMessage extends PromptElement<InlineSummarizationUserMessageProps> {
|
||||
override async render(state: void, sizing: PromptSizing) {
|
||||
const isOpus = this.props.endpoint.model.startsWith('claude-opus');
|
||||
return <UserMessage priority={1000}>
|
||||
|
||||
@@ -30,7 +30,7 @@ import { ToolName } from '../../../../tools/common/toolNames';
|
||||
import { PromptRenderer } from '../../base/promptRenderer';
|
||||
import { AgentPrompt, AgentPromptProps } from '../agentPrompt';
|
||||
import { PromptRegistry } from '../promptRegistry';
|
||||
import { ConversationHistorySummarizationPrompt, extractInlineSummary, InlineSummarizationRequestedMetadata, stripToolSearchMessages, SummarizedConversationHistory, SummarizedConversationHistoryMetadata, SummarizedConversationHistoryPropsBuilder } from '../summarizedConversationHistory';
|
||||
import { ConversationHistorySummarizationPrompt, extractInlineSummary, stripToolSearchMessages, SummarizedConversationHistory, SummarizedConversationHistoryMetadata, SummarizedConversationHistoryPropsBuilder } from '../summarizedConversationHistory';
|
||||
|
||||
suite('Agent Summarization', () => {
|
||||
let accessor: ITestingServicesAccessor;
|
||||
@@ -582,162 +582,6 @@ suite('extractInlineSummary', () => {
|
||||
});
|
||||
});
|
||||
|
||||
suite('Inline Summarization Prompt', () => {
|
||||
let accessor: ITestingServicesAccessor;
|
||||
|
||||
beforeAll(() => {
|
||||
const services = createExtensionUnitTestingServices();
|
||||
services.define(IWorkspaceService, new SyncDescriptor(
|
||||
TestWorkspaceService,
|
||||
[
|
||||
[URI.file('/workspace')],
|
||||
[]
|
||||
]
|
||||
));
|
||||
services.define(IChatMLFetcher, new StaticChatMLFetcher([]));
|
||||
accessor = services.createTestingAccessor();
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
accessor.dispose();
|
||||
});
|
||||
|
||||
test('inlineSummarization=true appends summarization user message and metadata', async () => {
|
||||
const instaService = accessor.get(IInstantiationService);
|
||||
const endpoint = instaService.createInstance(MockEndpoint, undefined);
|
||||
const turn = new Turn('turnId', { type: 'user', message: 'hello' });
|
||||
const conversation = new Conversation('sessionId', [turn]);
|
||||
|
||||
const firstTurn = new Turn('id1', { type: 'user', message: 'previous turn message' });
|
||||
firstTurn.setResponse(TurnStatus.Success, { type: 'user', message: 'response' }, 'responseId', {
|
||||
metadata: {
|
||||
toolCallRounds: [
|
||||
new ToolCallRound('ok', [{
|
||||
id: 'tooluse_1',
|
||||
name: ToolName.EditFile,
|
||||
arguments: JSON.stringify({ filePath: '/workspace/file.ts', code: 'test' })
|
||||
}]),
|
||||
]
|
||||
}
|
||||
} as ICopilotChatResultIn);
|
||||
|
||||
const promptContext: IBuildPromptContext = {
|
||||
chatVariables: new ChatVariablesCollection([]),
|
||||
history: [firstTurn],
|
||||
query: 'continue',
|
||||
toolCallRounds: [
|
||||
new ToolCallRound('ok 2', [{
|
||||
id: 'tooluse_2',
|
||||
name: ToolName.EditFile,
|
||||
arguments: JSON.stringify({ filePath: '/workspace/file.ts', code: 'test2' })
|
||||
}]),
|
||||
],
|
||||
toolCallResults: {
|
||||
'tooluse_2': new LanguageModelToolResult([new LanguageModelTextPart('success')]),
|
||||
},
|
||||
tools: {
|
||||
availableTools: [],
|
||||
toolInvocationToken: null as never,
|
||||
toolReferences: [],
|
||||
},
|
||||
conversation,
|
||||
};
|
||||
|
||||
const customizations = await PromptRegistry.resolveAllCustomizations(instaService, endpoint);
|
||||
const props: AgentPromptProps = {
|
||||
priority: 1,
|
||||
endpoint,
|
||||
location: ChatLocation.Panel,
|
||||
promptContext,
|
||||
enableCacheBreakpoints: true,
|
||||
inlineSummarization: true,
|
||||
customizations,
|
||||
};
|
||||
|
||||
const renderer = PromptRenderer.create(instaService, endpoint, AgentPrompt, props);
|
||||
const result = await renderer.render();
|
||||
|
||||
// Should have InlineSummarizationRequestedMetadata set
|
||||
const inlineMeta = result.metadata.get(InlineSummarizationRequestedMetadata);
|
||||
expect(inlineMeta).toBeDefined();
|
||||
|
||||
// The last user message should contain summarization instructions
|
||||
const userMessages = result.messages.filter(m => m.role === Raw.ChatRole.User);
|
||||
const lastUserMessage = userMessages[userMessages.length - 1];
|
||||
const lastMessageText = lastUserMessage.content.map(c => 'text' in c ? c.text : '').join('');
|
||||
expect(lastMessageText).toContain('summary');
|
||||
expect(lastMessageText).toContain('Do NOT call any tools');
|
||||
|
||||
// Should NOT have the separate-call summarization metadata
|
||||
const summaryMeta = result.metadata.get(SummarizedConversationHistoryMetadata);
|
||||
expect(summaryMeta).toBeUndefined();
|
||||
});
|
||||
|
||||
test('inlineSummarization=true sets metadata when triggerSummarize is false', async () => {
|
||||
const instaService = accessor.get(IInstantiationService);
|
||||
const endpoint = instaService.createInstance(MockEndpoint, undefined);
|
||||
|
||||
const firstTurn = new Turn('id1', { type: 'user', message: 'previous turn message' });
|
||||
firstTurn.setResponse(TurnStatus.Success, { type: 'user', message: 'response' }, 'responseId', {
|
||||
metadata: {
|
||||
toolCallRounds: [
|
||||
new ToolCallRound('ok', [{
|
||||
id: 'tooluse_1',
|
||||
name: ToolName.EditFile,
|
||||
arguments: JSON.stringify({ filePath: '/workspace/file.ts', code: 'test' })
|
||||
}]),
|
||||
]
|
||||
}
|
||||
} as ICopilotChatResultIn);
|
||||
|
||||
const promptContext: IBuildPromptContext = {
|
||||
chatVariables: new ChatVariablesCollection([]),
|
||||
history: [firstTurn],
|
||||
query: 'continue',
|
||||
toolCallRounds: [
|
||||
new ToolCallRound('ok 2', [{
|
||||
id: 'tooluse_2',
|
||||
name: ToolName.EditFile,
|
||||
arguments: JSON.stringify({ filePath: '/workspace/file.ts', code: 'test2' })
|
||||
}]),
|
||||
],
|
||||
toolCallResults: {
|
||||
'tooluse_2': new LanguageModelToolResult([new LanguageModelTextPart('success')]),
|
||||
},
|
||||
tools: {
|
||||
availableTools: [],
|
||||
toolInvocationToken: null as never,
|
||||
toolReferences: [],
|
||||
},
|
||||
};
|
||||
|
||||
// When both triggerSummarize and inlineSummarization are true,
|
||||
// triggerSummarize should take precedence (inlineSummarization condition
|
||||
// requires triggerSummarize to be false).
|
||||
// We test this indirectly: inlineSummarization=true with triggerSummarize=false
|
||||
// should set InlineSummarizationRequestedMetadata, but if triggerSummarize were
|
||||
// also true, the inline path would be skipped.
|
||||
const customizations = await PromptRegistry.resolveAllCustomizations(instaService, endpoint);
|
||||
const propsInlineOnly: AgentPromptProps = {
|
||||
priority: 1,
|
||||
endpoint,
|
||||
location: ChatLocation.Panel,
|
||||
promptContext,
|
||||
enableCacheBreakpoints: true,
|
||||
triggerSummarize: false,
|
||||
inlineSummarization: true,
|
||||
customizations,
|
||||
};
|
||||
|
||||
const renderer = PromptRenderer.create(instaService, endpoint, AgentPrompt, propsInlineOnly);
|
||||
const result = await renderer.render();
|
||||
|
||||
// Inline metadata should be set when triggerSummarize is false
|
||||
const inlineMeta = result.metadata.get(InlineSummarizationRequestedMetadata);
|
||||
expect(inlineMeta).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
suite('stripToolSearchMessages', () => {
|
||||
function makeAssistantMessage(toolCalls: { id: string; name: string }[], text = 'response'): Raw.ChatMessage {
|
||||
return {
|
||||
|
||||
@@ -965,8 +965,6 @@ export namespace ConfigKey {
|
||||
export const NewWorkspaceCreationAgentEnabled = defineSetting<boolean>('chat.newWorkspaceCreation.enabled', ConfigType.Simple, true);
|
||||
export const NewWorkspaceUseContext7 = defineSetting<boolean>('chat.newWorkspace.useContext7', ConfigType.Simple, false);
|
||||
export const SummarizeAgentConversationHistory = defineSetting<boolean>('chat.summarizeAgentConversationHistory.enabled', ConfigType.Simple, true);
|
||||
export const ConversationTranscriptLookup = defineSetting<boolean>('chat.conversationTranscriptLookup.enabled', ConfigType.ExperimentBased, false);
|
||||
export const BackgroundCompaction = defineSetting<boolean>('chat.backgroundCompaction', ConfigType.ExperimentBased, false);
|
||||
export const VirtualToolThreshold = defineSetting<number>('chat.virtualTools.threshold', ConfigType.ExperimentBased, HARD_TOOL_LIMIT);
|
||||
export const CurrentEditorAgentContext = defineSetting<boolean>('chat.agent.currentEditorContext.enabled', ConfigType.Simple, true);
|
||||
/** BYOK */
|
||||
|
||||
Reference in New Issue
Block a user