Background inline summarization v1 (#308923)

* Refactor inline summarization handling in ToolCallingLoop * Refactor conversation summarization settings and improve logging in AgentIntent * Refactor agent intent to improve telemetry and remove obsolete test file * Refactor inline summarization handling: remove unused properties and related tests * Remove unused summarization instruction from AgentPromptProps interface * Refactor AgentIntentInvocation to streamline model capabilities handling in background summarization * Update debugName for background summarization to reflect inline context * Update logging message in AgentIntentInvocation for clarity and remove unused test suite for inline summarization
2026-05-17 22:00:59 +01:00 · 2026-04-09 20:16:57 -07:00
parent ce9a6650d4
commit 100cbe59eb
9 changed files with 283 additions and 647 deletions
@@ -3224,15 +3224,6 @@
 							"onExp"
 						]
 					},
-					"github.copilot.chat.backgroundCompaction": {
-						"type": "boolean",
-						"default": false,
-						"markdownDescription": "%github.copilot.config.backgroundCompaction%",
-						"tags": [
-							"preview",
-							"onExp"
-						]
-					},
 					"github.copilot.chat.anthropic.toolSearchTool.enabled": {
 						"type": "boolean",
 						"default": true,
@@ -3253,15 +3244,6 @@
 							"preview",
 							"onExp"
 						]
-					},
-					"github.copilot.chat.conversationTranscriptLookup.enabled": {
-						"type": "boolean",
-						"default": false,
-						"description": "%github.copilot.config.conversationTranscriptLookup.enabled%",
-						"tags": [
-							"preview",
-							"onExp"
-						]
 					}
 				}
 			},
@@ -263,7 +263,6 @@
 	"github.copilot.config.editsNewNotebook.enabled": "Whether to enable the new notebook tool in Copilot Edits.",
 	"github.copilot.config.notebook.inlineEditAgent.enabled": "Enable agent-like behavior from the notebook inline chat widget.",
 	"github.copilot.config.summarizeAgentConversationHistory.enabled": "Whether to auto-compact agent conversation history once the context window is filled.",
-	"github.copilot.config.conversationTranscriptLookup.enabled": "When enabled, after conversation history is summarized the model is informed it can look up the full conversation transcript via read_file.",
 	"github.copilot.tools.createNewWorkspace.name": "Create New Workspace",
 	"github.copilot.tools.openEmptyFolder.name": "Open an empty folder as VS Code workspace",
 	"github.copilot.tools.getProjectSetupInfo.name": "Get Project Setup Info",
@@ -391,9 +390,7 @@
 	"github.copilot.config.instantApply.shortContextLimit": "Token limit for short context instant apply.",
 	"github.copilot.config.summarizeAgentConversationHistoryThreshold": "Threshold for compacting agent conversation history.",
 	"github.copilot.config.agentHistorySummarizationMode": "Mode for agent history summarization.",
-	"github.copilot.config.backgroundCompaction": "Enable background compaction of conversation history.",
 	"github.copilot.config.agentHistorySummarizationInline": "Summarize conversation inline within the agent loop instead of a separate LLM call, maximizing prompt cache hits.",
-
 	"github.copilot.config.useResponsesApiTruncation": "Use Responses API for truncation.",
 	"github.copilot.config.enableReadFileV2": "Enable version 2 of the read file tool.",
 	"github.copilot.config.enableAskAgent": "Enable the Ask agent for answering questions.",
@@ -8,7 +8,7 @@ import { Raw, RenderPromptResult } from '@vscode/prompt-tsx';
 import { BudgetExceededError } from '@vscode/prompt-tsx/dist/base/materialized';
 import type * as vscode from 'vscode';
 import { IChatSessionService } from '../../../platform/chat/common/chatSessionService';
-import { ChatLocation, ChatResponse } from '../../../platform/chat/common/commonTypes';
+import { ChatFetchResponseType, ChatLocation, ChatResponse } from '../../../platform/chat/common/commonTypes';
 import { ConfigKey, IConfigurationService } from '../../../platform/configuration/common/configurationService';
 import { isAnthropicFamily, isGptFamily, modelCanUseApplyPatchExclusively, modelCanUseReplaceStringExclusively, modelSupportsApplyPatch, modelSupportsMultiReplaceString, modelSupportsReplaceString, modelSupportsSimplifiedApplyPatchInstructions } from '../../../platform/endpoint/common/chatModelCapabilities';
 import { IEndpointProvider } from '../../../platform/endpoint/common/endpointProvider';
@@ -16,7 +16,7 @@ import { IAutomodeService } from '../../../platform/endpoint/node/automodeServic
 import { IEnvService } from '../../../platform/env/common/envService';
 import { ILogService } from '../../../platform/log/common/logService';
 import { IEditLogService } from '../../../platform/multiFileEdit/common/editLogService';
-import { CUSTOM_TOOL_SEARCH_NAME, isAnthropicCustomToolSearchEnabled, isAnthropicToolSearchEnabled } from '../../../platform/networking/common/anthropic';
+import { CUSTOM_TOOL_SEARCH_NAME, isAnthropicCustomToolSearchEnabled, isAnthropicContextEditingEnabled, isAnthropicToolSearchEnabled } from '../../../platform/networking/common/anthropic';
 import { IChatEndpoint } from '../../../platform/networking/common/networking';
 import { modelsWithoutResponsesContextManagement } from '../../../platform/networking/common/openai';
 import { INotebookService } from '../../../platform/notebook/common/notebookService';
@@ -47,13 +47,14 @@ import { IBuildPromptResult, IIntent, IIntentInvocation } from '../../prompt/nod
 import { AgentPrompt, AgentPromptProps } from '../../prompts/node/agent/agentPrompt';
 import { BackgroundSummarizationState, BackgroundSummarizer, IBackgroundSummarizationResult } from '../../prompts/node/agent/backgroundSummarizer';
 import { AgentPromptCustomizations, PromptRegistry } from '../../prompts/node/agent/promptRegistry';
-import { SummarizedConversationHistory, SummarizedConversationHistoryMetadata, SummarizedConversationHistoryPropsBuilder } from '../../prompts/node/agent/summarizedConversationHistory';
-import { PromptRenderer } from '../../prompts/node/base/promptRenderer';
+import { extractInlineSummary, InlineSummarizationUserMessage, SummarizedConversationHistory, SummarizedConversationHistoryMetadata, SummarizedConversationHistoryPropsBuilder } from '../../prompts/node/agent/summarizedConversationHistory';
+import { PromptRenderer, renderPromptElement } from '../../prompts/node/base/promptRenderer';
 import { ICodeMapperService } from '../../prompts/node/codeMapper/codeMapperService';
 import { EditCodePrompt2 } from '../../prompts/node/panel/editCodePrompt2';
 import { NotebookInlinePrompt } from '../../prompts/node/panel/notebookInlinePrompt';
 import { ToolResultMetadata } from '../../prompts/node/panel/toolCalling';
 import { IEditToolLearningService } from '../../tools/common/editToolLearningService';
+import { normalizeToolSchema } from '../../tools/common/toolSchemaNormalizer';
 import { ContributedToolName, ToolName } from '../../tools/common/toolNames';
 import { IToolsService } from '../../tools/common/toolsService';
 import { applyPatch5Description } from '../../tools/node/applyPatchTool';
@@ -62,8 +63,7 @@ import { replaceStringBatchDescription } from '../../tools/node/replaceStringToo
 import { getAgentMaxRequests } from '../common/agentConfig';
 import { addCacheBreakpoints } from './cacheBreakpoints';
 import { EditCodeIntent, EditCodeIntentInvocation, EditCodeIntentInvocationOptions, mergeMetadata, toNewChatReferences } from './editCodeIntent';
-
-const INLINE_SUMMARIZATION_BUDGET_EXPANSION = 1.15;
+import { ToolCallingLoop } from './toolCallingLoop';

 function isResponsesCompactionContextManagementEnabled(endpoint: IChatEndpoint, configurationService: IConfigurationService, experimentationService: IExperimentationService): boolean {
 	return endpoint.apiType === 'responses'
@@ -356,6 +356,9 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I

 	private _lastRenderTokenCount: number = 0;

+	/** Cached model capabilities from the most recent main agent render, reused by the background summarizer. */
+	private _lastModelCapabilities: { enableThinking: boolean; reasoningEffort: string | undefined; enableToolSearch: boolean; enableContextEditing: boolean } | undefined;
+
 	constructor(
 		intent: IIntent,
 		location: ChatLocation,
@@ -418,9 +421,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
 		const useTruncation = this.endpoint.apiType === 'responses' && this.configurationService.getConfig(ConfigKey.Advanced.UseResponsesApiTruncation);
 		const responsesCompactionContextManagementEnabled = isResponsesCompactionContextManagementEnabled(this.endpoint, this.configurationService, this.expService);
 		const summarizationEnabled = this.configurationService.getConfig(ConfigKey.SummarizeAgentConversationHistory) && this.prompt === AgentPrompt && !responsesCompactionContextManagementEnabled;
-		const inlineSummarizationEnabled = summarizationEnabled && this.configurationService.getExperimentBasedConfig(ConfigKey.Advanced.AgentHistorySummarizationInline, this.expService);
-		// Disable background compaction when inline summarization is active — they solve the same problem
-		const backgroundCompactionEnabled = summarizationEnabled && !inlineSummarizationEnabled && this.configurationService.getExperimentBasedConfig(ConfigKey.BackgroundCompaction, this.expService);
+		const useInlineSummarization = summarizationEnabled && this.configurationService.getExperimentBasedConfig(ConfigKey.Advanced.AgentHistorySummarizationInline, this.expService);

 		// When tools are present, apply a 10% safety margin on the message portion
 		// to account for tokenizer discrepancies between our tool-token counter and
@@ -432,7 +433,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
 		const safeBudget = useTruncation ? Number.MAX_SAFE_INTEGER : messageBudget;
 		const endpoint = toolTokens > 0 ? this.endpoint.cloneWithTokenOverride(safeBudget) : this.endpoint;

-		this.logService.debug(`AgentIntent: rendering with budget=${safeBudget} (baseBudget: ${baseBudget}, toolTokens: ${toolTokens}, totalTools: ${tools?.length ?? 0}, toolSearchEnabled: ${toolSearchEnabled}), summarizationEnabled=${summarizationEnabled}`);
+		this.logService.debug(`[Agent] rendering with budget=${safeBudget} (baseBudget: ${baseBudget}, toolTokens: ${toolTokens}, totalTools: ${tools?.length ?? 0}, toolSearchEnabled: ${toolSearchEnabled}), summarizationEnabled=${summarizationEnabled}`);
 		let result: RenderPromptResult;
 		const props: AgentPromptProps = {
 			endpoint,
@@ -449,94 +450,42 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
 			customizations: this._resolvedCustomizations
 		};

-		// ── Background compaction: dual-threshold approach ────────────────
+		// ── Background compaction ────────────────────────────────────────
 		//
-		// Background compaction thresholds (checked post-render using the
-		// actual tokenCount from the current render):
+		//   Pre-render: if a previous bg pass completed, apply it now.
 		//
-		//   Completed (previous bg pass)  → apply the summary before rendering.
+		//   BudgetExceeded: if bg is InProgress/Completed, wait/apply.
+		//                   Otherwise fall back to foreground summarization.
 		//
-		//   ≥ 95% + InProgress             → block on the background compaction
-		//                                    completing, then apply before rendering.
+		//   Post-render (≥ 80% + Idle): kick off background compaction
+		//                                so it is ready for a future turn.
 		//
-		//   ≥ 80% + Idle (post-render)     → kick off background compaction so
-		//                                    it is ready for a future iteration.
-		//
-		const backgroundSummarizer = backgroundCompactionEnabled ? this._getOrCreateBackgroundSummarizer(promptContext.conversation?.sessionId) : undefined;
+		const backgroundSummarizer = summarizationEnabled ? this._getOrCreateBackgroundSummarizer(promptContext.conversation?.sessionId) : undefined;
 		const contextRatio = backgroundSummarizer && baseBudget > 0
 			? (this._lastRenderTokenCount + toolTokens) / baseBudget
 			: 0;

-		// ── Proactive inline summarization: pre-render check ──────────────
-		// Use _lastRenderTokenCount (from the previous iteration) to decide
-		// whether to append the summarize instruction *before* the main
-		// render, avoiding a wasteful double-render.
-		// Guard: skip when a summary was already stored on the current or
-		// most-recent history turn — _lastRenderTokenCount is stale from the
-		// summarization render and would falsely re-trigger.
-		let proactiveInlineSummarization = false;
-		if (inlineSummarizationEnabled && baseBudget > 0) {
-			const hasRecentSummary = promptContext.toolCallRounds?.some(r => r.summary)
-				|| promptContext.history.at(-1)?.rounds.some(r => r.summary);
-			if (!hasRecentSummary) {
-				const preRenderRatio = (this._lastRenderTokenCount + toolTokens) / baseBudget;
-				if (preRenderRatio >= 0.85) {
-					this.logService.debug(`[Agent] pre-render at ${(preRenderRatio * 100).toFixed(0)}% — proactively enabling inline summarization`);
-					proactiveInlineSummarization = true;
-				}
-			}
-		}
-
 		// Track whether we applied a summary in this iteration so we don't
 		// immediately re-trigger background compaction in the post-render check.
 		let summaryAppliedThisIteration = false;

-		// 1. If a previous background pass completed, apply its summary now.
-		if (backgroundCompactionEnabled && backgroundSummarizer?.state === BackgroundSummarizationState.Completed) {
+		// If a previous background pass completed, apply its summary now.
+		if (summarizationEnabled && backgroundSummarizer?.state === BackgroundSummarizationState.Completed) {
 			const bgResult = backgroundSummarizer.consumeAndReset();
 			if (bgResult) {
-				this.logService.debug(`[Agent] applying completed background summary (roundId=${bgResult.toolCallRoundId})`);
+				this.logService.debug(`[ConversationHistorySummarizer] applying completed background summary (roundId=${bgResult.toolCallRoundId})`);
 				progress.report(new ChatResponseProgressPart2(l10n.t('Compacted conversation'), async () => l10n.t('Compacted conversation')));
 				this._applySummaryToRounds(bgResult, promptContext);
 				this._persistSummaryOnTurn(bgResult, promptContext, this._lastRenderTokenCount);
 				this._sendBackgroundCompactionTelemetry('preRender', 'applied', contextRatio, promptContext);
 				summaryAppliedThisIteration = true;
 			} else {
-				this.logService.warn(`[Agent] background compaction state was Completed but consumeAndReset returned no result`);
+				this.logService.warn(`[ConversationHistorySummarizer] background compaction state was Completed but consumeAndReset returned no result`);
 				this._sendBackgroundCompactionTelemetry('preRender', 'noResult', contextRatio, promptContext);
 				this._recordBackgroundCompactionFailure(promptContext, 'preRender');
 			}
 		}

-		// 2. At ≥ 95% — block and wait for the in-progress compaction,
-		//    then apply the result before rendering.
-		if (backgroundCompactionEnabled && backgroundSummarizer && contextRatio >= 0.95 && backgroundSummarizer.state === BackgroundSummarizationState.InProgress) {
-			this.logService.debug(`[Agent] context at ${(contextRatio * 100).toFixed(0)}% — blocking on background compaction`);
-			const summaryPromise = backgroundSummarizer.waitForCompletion();
-			progress.report(new ChatResponseProgressPart2(l10n.t('Compacting conversation...'), async () => {
-				try { await summaryPromise; } catch { }
-				return l10n.t('Compacted conversation');
-			}));
-			await summaryPromise;
-			const bgResult = backgroundSummarizer.consumeAndReset();
-			if (bgResult) {
-				this.logService.debug(`[Agent] background compaction completed — applying result (roundId=${bgResult.toolCallRoundId})`);
-				this._applySummaryToRounds(bgResult, promptContext);
-				this._persistSummaryOnTurn(bgResult, promptContext, this._lastRenderTokenCount);
-				this._sendBackgroundCompactionTelemetry('preRenderBlocked', 'applied', contextRatio, promptContext);
-				summaryAppliedThisIteration = true;
-			} else {
-				this.logService.debug(`[Agent] background compaction finished but produced no usable result — will attempt foreground summarization if budget exceeded`);
-				this._sendBackgroundCompactionTelemetry('preRenderBlocked', 'noResult', contextRatio, promptContext);
-				this._recordBackgroundCompactionFailure(promptContext, 'preRenderBlocked');
-				// Don't attempt a foreground fallback here — the main render below
-				// will either succeed (context estimate was pessimistic) or throw
-				// BudgetExceededError, which the catch block handles with foreground
-				// summarization. Short-circuiting here would skip the main render
-				// unnecessarily when it might still fit.
-			}
-		}
-
 		// Render the prompt without summarization or cache breakpoints, using
 		// the original endpoint (not reduced for tools/safety buffer).
 		const renderWithoutSummarization = async (reason: string, renderProps: AgentPromptProps = props): Promise<RenderPromptResult> => {
@@ -567,7 +516,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
 			const turn = promptContext.conversation?.getLatestTurn();
 			const previousForegroundSummary = turn?.getMetadata(SummarizedConversationHistoryMetadata);
 			if (previousForegroundSummary?.source === 'foreground' && previousForegroundSummary.outcome && previousForegroundSummary.outcome !== 'success') {
-				this.logService.debug(`[Agent] ${reason}, skipping repeated foreground summarization after prior failure (${previousForegroundSummary.outcome})`);
+				this.logService.debug(`[ConversationHistorySummarizer] ${reason}, skipping repeated foreground summarization after prior failure (${previousForegroundSummary.outcome})`);
 				/* __GDPR__
 					"triggerSummarizeSkipped" : {
 						"owner": "bhavyau",
@@ -581,7 +530,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
 				return renderWithoutSummarization(`skipping repeated foreground summarization after prior failure (${previousForegroundSummary.outcome})`, renderProps);
 			}

-			this.logService.debug(`[Agent] ${reason}, triggering summarization`);
+			this.logService.debug(`[ConversationHistorySummarizer] ${reason}, triggering summarization`);
 			try {
 				const renderer = PromptRenderer.create(this.instantiationService, this.endpoint, this.prompt, {
 					...renderProps,
@@ -591,7 +540,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
 				});
 				return await renderer.render(progress, token);
 			} catch (e) {
-				this.logService.error(e, `[Agent] summarization failed`);
+				this.logService.error(e, `[ConversationHistorySummarizer] summarization failed`);
 				const errorKind = e instanceof BudgetExceededError ? 'budgetExceeded' : 'error';
 				/* __GDPR__
 					"triggerSummarizeFailed" : {
@@ -621,36 +570,10 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
 			}
 		};

-		// Helper function for inline summarization — appends summarize instruction
-		// as a user message in the agent loop instead of making a separate LLM call.
-		// Returns the render result with InlineSummarizationRequestedMetadata set.
-		const renderWithInlineSummarization = async (reason: string, renderProps: AgentPromptProps = props): Promise<RenderPromptResult> => {
-			this.logService.debug(`[Agent] ${reason}, triggering inline summarization`);
-			try {
-				// Expand from the *base* endpoint (not renderProps.endpoint which may already be expanded)
-				const expandedEndpoint = endpoint.cloneWithTokenOverride(endpoint.modelMaxPromptTokens * INLINE_SUMMARIZATION_BUDGET_EXPANSION);
-				const renderer = PromptRenderer.create(this.instantiationService, expandedEndpoint, this.prompt, {
-					...renderProps,
-					endpoint: expandedEndpoint,
-					inlineSummarization: true,
-				});
-				return await renderer.render(progress, token);
-			} catch (e) {
-				this.logService.error(e, `[Agent] inline summarization render failed, falling back to separate-call summarization`);
-				return await renderWithSummarization(`inline summarization failed (${e instanceof Error ? e.message : e}), falling back`, renderProps);
-			}
-		};
-
 		const contextLengthBefore = this._lastRenderTokenCount;

 		try {
-			const renderEndpoint = proactiveInlineSummarization
-				? endpoint.cloneWithTokenOverride(endpoint.modelMaxPromptTokens * INLINE_SUMMARIZATION_BUDGET_EXPANSION)
-				: endpoint;
-			const renderProps: AgentPromptProps = proactiveInlineSummarization
-				? { ...props, endpoint: renderEndpoint, inlineSummarization: true }
-				: props;
-			const renderer = PromptRenderer.create(this.instantiationService, renderEndpoint, this.prompt, renderProps);
+			const renderer = PromptRenderer.create(this.instantiationService, endpoint, this.prompt, props);
 			result = await renderer.render(progress, token);
 		} catch (e) {
 			if (e instanceof BudgetExceededError && summarizationEnabled) {
@@ -670,7 +593,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
 					let budgetExceededTrigger: string;
 					if (backgroundSummarizer.state === BackgroundSummarizationState.InProgress) {
 						budgetExceededTrigger = 'budgetExceededWaited';
-						this.logService.debug(`[Agent] budget exceeded — waiting on in-progress background compaction instead of new request`);
+						this.logService.debug(`[ConversationHistorySummarizer] budget exceeded — waiting on in-progress background compaction instead of new request`);
 						const summaryPromise = backgroundSummarizer.waitForCompletion();
 						progress.report(new ChatResponseProgressPart2(l10n.t('Compacting conversation...'), async () => {
 							try { await summaryPromise; } catch { }
@@ -679,12 +602,12 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
 						await summaryPromise;
 					} else {
 						budgetExceededTrigger = 'budgetExceededReady';
-						this.logService.debug(`[Agent] budget exceeded — applying already-completed background compaction`);
+						this.logService.debug(`[ConversationHistorySummarizer] budget exceeded — applying already-completed background compaction`);
 						progress.report(new ChatResponseProgressPart2(l10n.t('Compacted conversation'), async () => l10n.t('Compacted conversation')));
 					}
 					const bgResult = backgroundSummarizer.consumeAndReset();
 					if (bgResult) {
-						this.logService.debug(`[Agent] background compaction applied after budget exceeded (roundId=${bgResult.toolCallRoundId})`);
+						this.logService.debug(`[ConversationHistorySummarizer] background compaction applied after budget exceeded (roundId=${bgResult.toolCallRoundId})`);
 						this._applySummaryToRounds(bgResult, promptContext);
 						this._persistSummaryOnTurn(bgResult, promptContext, contextLengthBefore);
 						this._sendBackgroundCompactionTelemetry(budgetExceededTrigger, 'applied', contextRatio, promptContext);
@@ -693,14 +616,12 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
 						const renderer = PromptRenderer.create(this.instantiationService, endpoint, this.prompt, { ...props, promptContext });
 						result = await renderer.render(progress, token);
 					} else {
-						this.logService.debug(`[Agent] background compaction produced no usable result after budget exceeded — falling back to synchronous summarization`);
+						this.logService.debug(`[ConversationHistorySummarizer] background compaction produced no usable result after budget exceeded — falling back to synchronous summarization`);
 						this._sendBackgroundCompactionTelemetry(budgetExceededTrigger, 'noResult', contextRatio, promptContext);
 						this._recordBackgroundCompactionFailure(promptContext, budgetExceededTrigger);
 						// Background compaction failed — fall back to synchronous summarization
 						result = await renderWithSummarization(`budget exceeded(${e.message}), background compaction failed`);
 					}
-				} else if (inlineSummarizationEnabled) {
-					result = await renderWithInlineSummarization(`budget exceeded(${e.message})`);
 				} else {
 					result = await renderWithSummarization(`budget exceeded(${e.message})`);
 				}
@@ -734,47 +655,27 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
 			));
 		}

-		// 3. Post-render background compaction checks.
-		if (backgroundCompactionEnabled && backgroundSummarizer && !summaryAppliedThisIteration) {
+		// Post-render: kick off background compaction at ≥ 80% if idle.
+		if (summarizationEnabled && backgroundSummarizer && !summaryAppliedThisIteration) {
 			const postRenderRatio = baseBudget > 0
 				? (result.tokenCount + toolTokens) / baseBudget
 				: 0;

-			if (postRenderRatio >= 0.95 && backgroundSummarizer.state === BackgroundSummarizationState.InProgress) {
-				// At ≥ 95% with a background compaction already running — block,
-				// wait for it, apply the result, and re-render so the LLM gets
-				// the compacted prompt instead of the oversized one.
-				this.logService.debug(`[Agent] post-render at ${(postRenderRatio * 100).toFixed(0)}% — blocking on in-progress background compaction`);
-				const summaryPromise = backgroundSummarizer.waitForCompletion();
-				progress.report(new ChatResponseProgressPart2(l10n.t('Compacting conversation...'), async () => {
-					try { await summaryPromise; } catch { }
-					return l10n.t('Compacted conversation');
-				}));
-				await summaryPromise;
-				const bgResult = backgroundSummarizer.consumeAndReset();
-				if (bgResult) {
-					this.logService.debug(`[Agent] post-render background compaction completed — applying result and re-rendering (roundId=${bgResult.toolCallRoundId})`);
-					this._applySummaryToRounds(bgResult, promptContext);
-					this._persistSummaryOnTurn(bgResult, promptContext, result.tokenCount);
-					this._sendBackgroundCompactionTelemetry('postRenderBlocked', 'applied', postRenderRatio, promptContext);
-					// Re-render with compacted history so the LLM receives the smaller prompt
-					const reRenderer = PromptRenderer.create(this.instantiationService, endpoint, this.prompt, { ...props, promptContext });
-					result = await reRenderer.render(progress, token);
-					this._lastRenderTokenCount = result.tokenCount;
-				} else {
-					this.logService.debug(`[Agent] post-render background compaction finished but produced no usable result — falling back to foreground summarization`);
-					this._sendBackgroundCompactionTelemetry('postRenderBlocked', 'noResult', postRenderRatio, promptContext);
-					this._recordBackgroundCompactionFailure(promptContext, 'postRenderBlocked');
-					try {
-						result = await renderWithSummarization('post-render background compaction noResult fallback');
-						this._lastRenderTokenCount = result.tokenCount;
-					} catch (e) {
-						this.logService.error(e, `[Agent] post-render foreground summarization fallback also failed — using original render result`);
-					}
+			if (postRenderRatio >= 0.80 && (backgroundSummarizer.state === BackgroundSummarizationState.Idle || backgroundSummarizer.state === BackgroundSummarizationState.Failed)) {
+				if (useInlineSummarization) {
+					// Compute and cache model capabilities from the current render's
+					// messages. These must match the main agent fetch for cache parity.
+					const strippedMessages = ToolCallingLoop.stripInternalToolCallIds(result.messages);
+					const rawEffort = this.request.modelConfiguration?.reasoningEffort;
+					const isSubagent = !!this.request.subAgentInvocationId;
+					this._lastModelCapabilities = {
+						enableThinking: !isAnthropicFamily(this.endpoint) || ToolCallingLoop.messagesContainThinking(strippedMessages),
+						reasoningEffort: typeof rawEffort === 'string' ? rawEffort : undefined,
+						enableToolSearch: !isSubagent && isAnthropicToolSearchEnabled(this.endpoint, this.configurationService),
+						enableContextEditing: !isSubagent && isAnthropicContextEditingEnabled(this.endpoint, this.configurationService, this.expService),
+					};
 				}
-			} else if (postRenderRatio >= 0.80 && (backgroundSummarizer.state === BackgroundSummarizationState.Idle || backgroundSummarizer.state === BackgroundSummarizationState.Failed)) {
-				// At ≥ 80% with no running compaction (or a previous failure) — kick off background work.
-				this._startBackgroundSummarization(backgroundSummarizer, props, token, postRenderRatio);
+				this._startBackgroundSummarization(backgroundSummarizer, result.messages, promptContext, props, token, postRenderRatio, useInlineSummarization);
 			}
 		}

@@ -841,52 +742,239 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I

 	private _startBackgroundSummarization(
 		backgroundSummarizer: BackgroundSummarizer,
+		mainRenderMessages: Raw.ChatMessage[],
+		promptContext: IBuildPromptContext,
 		props: AgentPromptProps,
 		token: vscode.CancellationToken,
 		contextRatio: number,
+		useInlineSummarization: boolean,
 	): void {
-		this.logService.debug(`[Agent] context at ${(contextRatio * 100).toFixed(0)}% — starting background compaction`);
-		// Deep-copy toolCallRounds and toolCallResults so the background render
-		// sees a frozen snapshot and doesn't drift as the main loop adds rounds.
-		const snapshotProps: AgentPromptProps = {
-			...props,
-			promptContext: {
-				...props.promptContext,
-				toolCallRounds: props.promptContext.toolCallRounds ? [...props.promptContext.toolCallRounds] : undefined,
-				toolCallResults: props.promptContext.toolCallResults ? { ...props.promptContext.toolCallResults } : undefined,
-			}
-		};
-		const bgRenderer = PromptRenderer.create(this.instantiationService, this.endpoint, this.prompt, {
-			...snapshotProps,
-			endpoint: this.endpoint,
-			promptContext: snapshotProps.promptContext,
-			triggerSummarize: true,
-			summarizationSource: 'background',
-		});
-		const bgProgress: vscode.Progress<vscode.ChatResponseReferencePart | vscode.ChatResponseProgressPart> = { report: () => { } };
+		this.logService.debug(`[ConversationHistorySummarizer] context at ${(contextRatio * 100).toFixed(0)}% — starting background compaction (inline=${useInlineSummarization})`);
+
 		const bgStartTime = Date.now();
+
+		// Snapshot rounds so telemetry reflects state at kick-off time, not at
+		// completion time (the main loop mutates toolCallRounds). History is
+		// stable across a single user turn so a reference is sufficient.
+		const rounds = [...(promptContext.toolCallRounds ?? [])];
+		const history = promptContext.history;
+		let toolCallRoundId: string | undefined;
+		if (rounds.length >= 2) {
+			// Mark the round before the last, preserving the last round verbatim
+			toolCallRoundId = rounds[rounds.length - 2].id;
+		} else if (rounds.length === 1) {
+			toolCallRoundId = rounds[0].id;
+		} else {
+			for (let i = history.length - 1; i >= 0 && !toolCallRoundId; i--) {
+				const lastRound = history[i].rounds.at(-1);
+				if (lastRound) {
+					toolCallRoundId = lastRound.id;
+				}
+			}
+		}
+
+		// Build tool schemas matching the main agent loop so the prompt
+		// prefix (system + tools + messages) is identical for cache hits.
+		const availableTools = promptContext.tools?.availableTools;
+		const normalizedTools = availableTools?.length ? normalizeToolSchema(
+			this.endpoint.family,
+			availableTools.map(tool => ({
+				function: {
+					name: tool.name,
+					description: tool.description,
+					parameters: tool.inputSchema && Object.keys(tool.inputSchema).length ? tool.inputSchema : undefined
+				},
+				type: 'function' as const,
+			})),
+			(tool, rule) => {
+				this.logService.warn(`[ConversationHistorySummarizer] Tool ${tool} failed validation: ${rule}`);
+			},
+		) : undefined;
+		const toolOpts = normalizedTools?.length ? {
+			tools: normalizedTools,
+		} : undefined;
+
+		const associatedRequestId = promptContext.conversation?.getLatestTurn()?.id;
+		const conversationId = promptContext.conversation?.sessionId;
+		const modelCapabilities = this._lastModelCapabilities;
+
 		backgroundSummarizer.start(async bgToken => {
 			try {
-				const bgRenderResult = await bgRenderer.render(bgProgress, bgToken);
-				const summaryMetadata = bgRenderResult.metadata.get(SummarizedConversationHistoryMetadata);
-				if (!summaryMetadata) {
-					throw new Error('Background compaction produced no summary metadata');
+				if (useInlineSummarization) {
+					// Inline mode: fork the exact messages from the main render
+					// and append a summary user message. The prompt prefix is
+					// byte-identical to the main agent loop for cache hits.
+					const strippedMainMessages = ToolCallingLoop.stripInternalToolCallIds(mainRenderMessages);
+					const summaryMsgResult = await renderPromptElement(
+						this.instantiationService,
+						this.endpoint,
+						InlineSummarizationUserMessage,
+						{ endpoint: this.endpoint },
+						undefined,
+						bgToken,
+					);
+					const messages = [
+						...strippedMainMessages,
+						...summaryMsgResult.messages,
+					];
+
+					const response = await this.endpoint.makeChatRequest2({
+						debugName: 'summarizeConversationHistory-inline',
+						messages,
+						finishedCb: undefined,
+						location: ChatLocation.Agent,
+						conversationId,
+						requestOptions: {
+							temperature: 0,
+							stream: false,
+							...toolOpts,
+						},
+						modelCapabilities,
+						telemetryProperties: associatedRequestId ? { associatedRequestId } : undefined,
+						enableRetryOnFilter: true,
+					}, bgToken);
+					if (response.type !== ChatFetchResponseType.Success) {
+						throw new Error(`Background inline summarization request failed: ${response.type}`);
+					}
+					const summaryText = extractInlineSummary(response.value);
+					if (!summaryText) {
+						throw new Error('Background inline summarization: no <summary> tags found in response');
+					}
+					if (!toolCallRoundId) {
+						throw new Error('Background inline summarization: no round ID to apply summary to');
+					}
+					this.logService.debug(`[ConversationHistorySummarizer] background inline compaction completed (${summaryText.length} chars, roundId=${toolCallRoundId})`);
+
+					// Send summarizedConversationHistory telemetry for parity
+					// with the standard ConversationHistorySummarizer path.
+					const numRoundsInHistory = history.reduce((sum, t) => sum + t.rounds.length, 0);
+					const numRoundsInCurrentTurn = rounds.length;
+					const lastUsedTool = rounds.at(-1)?.toolCalls?.at(-1)?.name
+						?? history.at(-1)?.rounds.at(-1)?.toolCalls?.at(-1)?.name ?? 'none';
+					/* __GDPR__
+						"summarizedConversationHistory" : {
+							"owner": "bhavyau",
+							"comment": "Tracks background inline summarization outcome",
+							"outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state." },
+							"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." },
+							"summarizationMode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The summarization mode." },
+							"source": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether background or foreground." },
+							"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Session id." },
+							"chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID." },
+							"lastUsedTool": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The last tool used before summarization." },
+							"requestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The request ID from the summarization call." },
+							"numRounds": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Total tool call rounds." },
+							"turnIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current turn." },
+							"curTurnRoundIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current round within the current turn." },
+							"isDuringToolCalling": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Whether this was triggered during tool calling." },
+							"duration": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Duration in ms." },
+							"promptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Prompt tokens." },
+							"promptCacheTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Cached prompt tokens." },
+							"responseTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Output tokens." }
+						}
+					*/
+					this.telemetryService.sendMSFTTelemetryEvent('summarizedConversationHistory', {
+						outcome: 'success',
+						model: this.endpoint.model,
+						summarizationMode: 'inline',
+						source: 'background',
+						conversationId,
+						chatRequestId: associatedRequestId,
+						lastUsedTool,
+						requestId: response.requestId,
+					}, {
+						numRounds: numRoundsInHistory + numRoundsInCurrentTurn,
+						turnIndex: history.length,
+						curTurnRoundIndex: numRoundsInCurrentTurn,
+						isDuringToolCalling: numRoundsInCurrentTurn > 0 ? 1 : 0,
+						duration: Date.now() - bgStartTime,
+						promptTokenCount: response.usage?.prompt_tokens,
+						promptCacheTokenCount: response.usage?.prompt_tokens_details?.cached_tokens,
+						responseTokenCount: response.usage?.completion_tokens,
+					});
+
+					return {
+						summary: summaryText,
+						toolCallRoundId,
+						promptTokens: response.usage?.prompt_tokens,
+						promptCacheTokens: response.usage?.prompt_tokens_details?.cached_tokens,
+						outputTokens: response.usage?.completion_tokens,
+						durationMs: Date.now() - bgStartTime,
+						model: this.endpoint.model,
+						summarizationMode: 'inline',
+						numRounds: undefined,
+						numRoundsSinceLastSummarization: undefined,
+					};
+				} else {
+					// Standard mode: use triggerSummarize which makes a separate
+					// LLM call with a summarization-specific prompt during render.
+					const snapshotProps: AgentPromptProps = {
+						...props,
+						promptContext: {
+							...promptContext,
+							toolCallRounds: promptContext.toolCallRounds ? [...promptContext.toolCallRounds] : undefined,
+							toolCallResults: promptContext.toolCallResults ? { ...promptContext.toolCallResults } : undefined,
+						}
+					};
+					const bgRenderer = PromptRenderer.create(this.instantiationService, this.endpoint, this.prompt, {
+						...snapshotProps,
+						endpoint: this.endpoint,
+						promptContext: snapshotProps.promptContext,
+						triggerSummarize: true,
+						summarizationSource: 'background',
+					});
+					const bgProgress: vscode.Progress<vscode.ChatResponseReferencePart | vscode.ChatResponseProgressPart> = { report: () => { } };
+					const bgRenderResult = await bgRenderer.render(bgProgress, bgToken);
+					const summaryMetadata = bgRenderResult.metadata.get(SummarizedConversationHistoryMetadata);
+					if (!summaryMetadata) {
+						throw new Error('Background compaction produced no summary metadata');
+					}
+					this.logService.debug(`[ConversationHistorySummarizer] background compaction completed successfully (roundId=${summaryMetadata.toolCallRoundId})`);
+					return {
+						summary: summaryMetadata.text,
+						toolCallRoundId: summaryMetadata.toolCallRoundId,
+						promptTokens: summaryMetadata.usage?.prompt_tokens,
+						promptCacheTokens: summaryMetadata.usage?.prompt_tokens_details?.cached_tokens,
+						outputTokens: summaryMetadata.usage?.completion_tokens,
+						durationMs: Date.now() - bgStartTime,
+						model: summaryMetadata.model,
+						summarizationMode: summaryMetadata.summarizationMode,
+						numRounds: summaryMetadata.numRounds,
+						numRoundsSinceLastSummarization: summaryMetadata.numRoundsSinceLastSummarization,
+					};
 				}
-				this.logService.debug(`[Agent] background compaction completed successfully (roundId=${summaryMetadata.toolCallRoundId})`);
-				return {
-					summary: summaryMetadata.text,
-					toolCallRoundId: summaryMetadata.toolCallRoundId,
-					promptTokens: summaryMetadata.usage?.prompt_tokens,
-					promptCacheTokens: summaryMetadata.usage?.prompt_tokens_details?.cached_tokens,
-					outputTokens: summaryMetadata.usage?.completion_tokens,
-					durationMs: Date.now() - bgStartTime,
-					model: summaryMetadata.model,
-					summarizationMode: summaryMetadata.summarizationMode,
-					numRounds: summaryMetadata.numRounds,
-					numRoundsSinceLastSummarization: summaryMetadata.numRoundsSinceLastSummarization,
-				};
 			} catch (err) {
-				this.logService.error(err, `[Agent] background compaction failed`);
+				this.logService.error(err, `[ConversationHistorySummarizer] background compaction failed`);
+
+				// Send failure telemetry for inline background summarization
+				if (useInlineSummarization) {
+					/* __GDPR__
+						"summarizedConversationHistory" : {
+							"owner": "bhavyau",
+							"comment": "Tracks background inline summarization failure",
+							"outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state." },
+							"detailedOutcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Detailed failure reason." },
+							"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." },
+							"summarizationMode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The summarization mode." },
+							"source": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether background or foreground." },
+							"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Session id." },
+							"chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID." },
+							"duration": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Duration in ms." }
+						}
+					*/
+					this.telemetryService.sendMSFTTelemetryEvent('summarizedConversationHistory', {
+						outcome: 'failed',
+						detailedOutcome: err instanceof Error ? err.message : String(err),
+						model: this.endpoint.model,
+						summarizationMode: 'inline',
+						source: 'background',
+						conversationId,
+						chatRequestId: associatedRequestId,
+					}, {
+						duration: Date.now() - bgStartTime,
+					});
+				}
+
 				throw err;
 			}
 		}, token);
@@ -924,7 +1012,7 @@ export class AgentIntentInvocation extends EditCodeIntentInvocation implements I
 				}
 			}
 			if (!found) {
-				this.logService.warn(`[Agent] background compaction round ${bgResult.toolCallRoundId} not found in toolCallRounds or history — summary dropped`);
+				this.logService.warn(`[ConversationHistorySummarizer] background compaction round ${bgResult.toolCallRoundId} not found in toolCallRounds or history — summary dropped`);
 			}
 		}
 		// Invalidate the auto mode router cache so the next getChatEndpoint()
@@ -48,7 +48,7 @@ import { ThinkingDataItem, ToolCallRound } from '../../prompt/common/toolCallRou
 import { IBuildPromptResult, IResponseProcessor } from '../../prompt/node/intents';
 import { PseudoStopStartResponseProcessor } from '../../prompt/node/pseudoStartStopConversationCallback';
 import { ResponseProcessorContext } from '../../prompt/node/responseProcessorContext';
-import { extractInlineSummary, InlineSummarizationRequestedMetadata, SummarizedConversationHistoryMetadata } from '../../prompts/node/agent/summarizedConversationHistory';
+import { SummarizedConversationHistoryMetadata } from '../../prompts/node/agent/summarizedConversationHistory';
 import { ToolFailureEncountered, ToolResultMetadata } from '../../prompts/node/panel/toolCalling';
 import { ToolName } from '../../tools/common/toolNames';
 import { IToolsService, ToolCallCancelledError } from '../../tools/common/toolsService';
@@ -355,9 +355,6 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
 	private taskCompleted = false;
 	private autopilotStopHookActive = false;
 	private autopilotProgressDeferred: DeferredPromise<void> | undefined;
-	private inlineSummarizationProgressDeferred: DeferredPromise<void> | undefined;
-	/** Set to true before calling fetch() when the current iteration is an inline summarization request. */
-	protected _isInlineSummarizationRequest = false;

 	/**
 	 * Autopilot stop hook — the model needs to call `task_complete` to signal it's done.
@@ -913,145 +910,6 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
 				this._sessionTranscriptService.logAssistantTurnEnd(sessionId, turnId);
 				agentSpan?.addEvent('turn_end', { turnId, ...(chatSessionId ? { [CopilotChatAttr.CHAT_SESSION_ID]: chatSessionId } : {}) });

-				// Inline summarization: the model responded with summary text only (no tool calls).
-				// Extract the summary, store it on the appropriate round, and continue the loop.
-				if (result.inlineSummarizationRequested && !result.round.toolCalls.length) {
-					if (result.response.type !== ChatFetchResponseType.Success) {
-						this.inlineSummarizationProgressDeferred?.complete(undefined);
-						this.inlineSummarizationProgressDeferred = undefined;
-					} else {
-						const summaryText = extractInlineSummary(result.round.response);
-						if (summaryText !== undefined) {
-							const summarizedRound = this.applySummaryToRound(summaryText);
-
-							if (summarizedRound) {
-								// Persist summary on the turn so normalizeSummariesOnRounds can restore it
-								const turn = this.turn;
-								const resolvedModel = result.response.resolvedModel;
-								const usage = result.response.usage;
-								turn.addPendingSummary(summarizedRound, summaryText);
-
-								const history = this.options.conversation.turns.slice(0, -1);
-								// Exclude the summarization round from telemetry counts for parity with separate-call summarization
-								const toolCallRoundsForTelemetry = this.toolCallRounds.slice(0, -1);
-								const numRoundsInHistory = history.reduce((sum, t) => sum + t.rounds.length, 0);
-								const numRoundsInCurrentTurn = toolCallRoundsForTelemetry.length;
-								const numRounds = numRoundsInHistory + numRoundsInCurrentTurn;
-								const lastUsedTool = toolCallRoundsForTelemetry.at(-1)?.toolCalls.at(-1)?.name
-									?? history.at(-1)?.rounds.at(-1)?.toolCalls.at(-1)?.name ?? 'none';
-
-								// Compute rounds since last summarization (same logic as ConversationHistorySummarizer)
-								let numRoundsSinceLastSummarization = -1;
-								for (let ri = toolCallRoundsForTelemetry.length - 1; ri >= 0; ri--) {
-									if (toolCallRoundsForTelemetry[ri].summary) {
-										numRoundsSinceLastSummarization = toolCallRoundsForTelemetry.length - 1 - ri;
-										break;
-									}
-								}
-								if (numRoundsSinceLastSummarization === -1) {
-									let count = numRoundsInCurrentTurn;
-									outerLoop: for (let ti = history.length - 1; ti >= 0; ti--) {
-										for (let ri = history[ti].rounds.length - 1; ri >= 0; ri--) {
-											if (history[ti].rounds[ri].summary) {
-												numRoundsSinceLastSummarization = count;
-												break outerLoop;
-											}
-											count++;
-										}
-									}
-								}
-
-								const inlineSummarizationMeta = new SummarizedConversationHistoryMetadata(
-									summarizedRound,
-									summaryText,
-									{
-										usage,
-										model: resolvedModel,
-										summarizationMode: 'inline',
-										numRounds,
-										numRoundsSinceLastSummarization,
-										source: 'foreground',
-										outcome: 'success',
-									},
-								);
-								turn.setMetadata(inlineSummarizationMeta);
-
-								// Fire telemetry matching the existing summarizedConversationHistory event
-								/* __GDPR__
-									"summarizedConversationHistory" : {
-										"owner": "bhavyau",
-										"comment": "Tracks inline summarization",
-										"outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state." },
-										"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." },
-										"summarizationMode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The summarization mode." },
-										"source": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether background or foreground." },
-										"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Session id." },
-										"chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID." },
-										"lastUsedTool": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The last tool used before summarization." },
-										"requestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The request ID from the summarization call." },
-										"numRounds": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Total tool call rounds." },
-										"numRoundsSinceLastSummarization": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Rounds since last summarization." },
-										"turnIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current turn." },
-										"curTurnRoundIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current round within the current turn." },
-										"isDuringToolCalling": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Whether this was triggered during tool calling." },
-										"promptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Prompt tokens." },
-										"promptCacheTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Cached prompt tokens." },
-										"responseTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Output tokens." }
-									}
-								*/
-								this._telemetryService.sendMSFTTelemetryEvent('summarizedConversationHistory', {
-									outcome: 'success',
-									model: resolvedModel,
-									summarizationMode: 'inline',
-									source: 'foreground',
-									conversationId: this.options.conversation.sessionId,
-									chatRequestId: turn.id,
-									lastUsedTool,
-									requestId: result.response.requestId,
-								}, {
-									numRounds,
-									numRoundsSinceLastSummarization,
-									turnIndex: history.length,
-									curTurnRoundIndex: numRoundsInCurrentTurn,
-									isDuringToolCalling: numRoundsInCurrentTurn > 0 ? 1 : 0,
-									promptTokenCount: usage?.prompt_tokens,
-									promptCacheTokenCount: usage?.prompt_tokens_details?.cached_tokens,
-									responseTokenCount: usage?.completion_tokens,
-								});
-								GenAiMetrics.incrementAgentSummarizationCount(this._otelService, 'success');
-
-								this._logService.info(`[ToolCallingLoop] Inline summarization extracted (${summaryText.length} chars, roundId=${summarizedRound}), continuing loop`);
-
-								// Remove the summarization round — it served its purpose
-								// and shouldn't be rendered as an assistant message in
-								// subsequent iterations (otherwise the model sees both
-								// the compacted <conversation-summary> AND the raw
-								// <analysis>...<summary>...</summary> response).
-								this.toolCallRounds.pop();
-
-								// Resolve the "Compacting conversation..." progress to show "Compacted conversation"
-								this.inlineSummarizationProgressDeferred?.complete(undefined);
-								this.inlineSummarizationProgressDeferred = undefined;
-								continue;
-							} else {
-								this._logService.warn(`[ToolCallingLoop] Inline summarization: no round found to store summary on`);
-								this._sendInlineSummarizationFailureTelemetry('noRoundFound', result.response);
-								GenAiMetrics.incrementAgentSummarizationCount(this._otelService, 'failed');
-								this.inlineSummarizationProgressDeferred?.complete(undefined);
-								this.inlineSummarizationProgressDeferred = undefined;
-								// Fall through to normal no-tool-calls handling (will break the loop)
-							}
-						} else {
-							this._logService.warn(`[ToolCallingLoop] Inline summarization requested but no summary extracted from response`);
-							this._sendInlineSummarizationFailureTelemetry('extractionFailed', result.response);
-							GenAiMetrics.incrementAgentSummarizationCount(this._otelService, 'failed');
-							this.inlineSummarizationProgressDeferred?.complete(undefined);
-							this.inlineSummarizationProgressDeferred = undefined;
-							// Fall through to normal no-tool-calls handling (will break the loop)
-						}
-					}
-				}
-
 				// If the model produced productive (non-task_complete) tool calls after being nudged,
 				// reset the stop hook flag and iteration count so it can be nudged again.
 				if (this.autopilotStopHookActive && result.round.toolCalls.length && !result.round.toolCalls.some(tc => tc.name === ToolCallingLoop.TASK_COMPLETE_TOOL_NAME)) {
@@ -1133,8 +991,6 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
 					break;
 				}
 			} catch (e) {
-				this.inlineSummarizationProgressDeferred?.complete(undefined);
-				this.inlineSummarizationProgressDeferred = undefined;
 				if (isCancellationError(e) && lastResult) {
 					break;
 				}
@@ -1332,19 +1188,6 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
 		if (conversationSummary) {
 			this.turn.setMetadata(conversationSummary);
 		}
-		const inlineSummarizationRequested = !!effectiveBuildPromptResult.metadata.get(InlineSummarizationRequestedMetadata);
-
-		// Show "Compacting conversation..." progress during the inline summarization
-		// fetch. The deferred is resolved in _runLoop after the summary is extracted.
-		if (inlineSummarizationRequested) {
-			this.inlineSummarizationProgressDeferred?.complete(undefined);
-			const deferred = new DeferredPromise<void>();
-			this.inlineSummarizationProgressDeferred = deferred;
-			outputStream?.progress(l10n.t('Compacting conversation...'), async () => {
-				await deferred.p;
-				return l10n.t('Compacted conversation');
-			});
-		}

 		const endpoint = await this._endpointProvider.getChatEndpoint(this.options.request);
 		const tokenizer = endpoint.acquireTokenizer();
@@ -1381,14 +1224,11 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =

 		this._logService.trace('Sending prompt to model');

-		// When inline summarization is requested, suppress streaming so the
-		// summary text (with <summary> tags) is not shown to the user.
-		const effectiveOutputStream = inlineSummarizationRequested ? undefined : outputStream;
-		const streamParticipants = effectiveOutputStream ? [effectiveOutputStream] : [];
+		const streamParticipants = outputStream ? [outputStream] : [];
 		let fetchStreamSource: FetchStreamSource | undefined;
 		let processResponsePromise: Promise<ChatResult | void> | undefined;
 		let stopEarly = false;
-		if (effectiveOutputStream) {
+		if (outputStream) {
 			this.options.streamParticipants?.forEach(fn => {
 				streamParticipants.push(fn(streamParticipants[streamParticipants.length - 1]));
 			});
@@ -1428,7 +1268,6 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
 		const enableThinking = !shouldDisableThinking;
 		let phase: string | undefined;
 		let compaction: OpenAIContextManagementResponse | undefined;
-		this._isInlineSummarizationRequest = inlineSummarizationRequested;
 		markChatExt(this.options.conversation.sessionId, ChatExtPerfMark.WillFetch);
 		const fetchResult = await this.fetch({
 			messages: this.applyMessagePostProcessing(effectiveBuildPromptResult.messages, { stripOrphanedToolCalls: isGeminiFamily(endpoint) }),
@@ -1559,7 +1398,6 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
 				hadIgnoredFiles: buildPromptResult.hasIgnoredFiles,
 				lastRequestMessages: effectiveBuildPromptResult.messages,
 				availableTools,
-				inlineSummarizationRequested,
 			};
 		}

@@ -1569,7 +1407,6 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
 			lastRequestMessages: effectiveBuildPromptResult.messages,
 			availableTools,
 			round: new ToolCallRound('', toolCalls, toolInputRetry),
-			inlineSummarizationRequested,
 		};
 	}

@@ -1582,84 +1419,6 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
 		return toolCallId + `__vscode-${ToolCallingLoop.NextToolCallId++}`;
 	}

-	/**
-	 * Finds the appropriate round and applies the summary text to it.
-	 *
-	 * After the summary round is pushed, `toolCallRounds` looks like:
-	 *   [r0, ..., rK, summaryRound]
-	 *
-	 * We want to keep the last real tool-call round (rK) verbatim so the model
-	 * retains context of its most recent actions. The summary replaces everything
-	 * before rK.
-	 *
-	 * @returns The round ID that was marked with the summary, or `undefined` if
-	 *          no suitable round was found.
-	 */
-	private applySummaryToRound(summaryText: string): string | undefined {
-		const rounds = this.toolCallRounds;
-		if (rounds.length > 2) {
-			// 3+ rounds: mark the one before the last real round, preserving rK verbatim
-			rounds[rounds.length - 3].summary = summaryText;
-			return rounds[rounds.length - 3].id;
-		} else if (rounds.length > 1) {
-			// 2 rounds (one real + summaryRound): mark the real round
-			rounds[rounds.length - 2].summary = summaryText;
-			return rounds[rounds.length - 2].id;
-		}
-		return undefined;
-	}
-
-	/**
-	 * Fires a `summarizedConversationHistory` telemetry event for inline summarization failures,
-	 * matching the format of the existing `ConversationHistorySummarizer.sendSummarizationTelemetry()`.
-	 */
-	private _sendInlineSummarizationFailureTelemetry(detailedOutcome: string, response: ChatResponse): void {
-		const history = this.options.conversation.turns.slice(0, -1);
-		const numRoundsInHistory = history.reduce((sum, t) => sum + t.rounds.length, 0);
-		const numRoundsInCurrentTurn = this.toolCallRounds.length;
-		const resolvedModel = response.type === ChatFetchResponseType.Success ? response.resolvedModel : undefined;
-		const requestId = response.type === ChatFetchResponseType.Success ? response.requestId : '';
-		const usage = response.type === ChatFetchResponseType.Success ? response.usage : undefined;
-
-		/* __GDPR__
-			"summarizedConversationHistory" : {
-				"owner": "bhavyau",
-				"comment": "Tracks inline summarization failure",
-				"outcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The success state." },
-				"detailedOutcome": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Detailed failure reason." },
-				"model": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The model ID." },
-				"summarizationMode": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The summarization mode." },
-				"source": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether background or foreground." },
-				"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Session id." },
-				"chatRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The chat request ID." },
-				"requestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The request ID from the summarization call." },
-				"numRounds": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Total tool call rounds." },
-				"turnIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current turn." },
-				"curTurnRoundIndex": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The index of the current round within the current turn." },
-				"promptTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Prompt tokens." },
-				"promptCacheTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Cached prompt tokens." },
-				"responseTokenCount": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "Output tokens." }
-			}
-		*/
-		this._telemetryService.sendMSFTTelemetryEvent('summarizedConversationHistory', {
-			outcome: 'failed',
-			detailedOutcome,
-			model: resolvedModel,
-			summarizationMode: 'inline',
-			source: 'foreground',
-			conversationId: this.options.conversation.sessionId,
-			chatRequestId: this.turn.id,
-			requestId,
-		}, {
-			numRounds: numRoundsInHistory + numRoundsInCurrentTurn,
-			turnIndex: history.length,
-			curTurnRoundIndex: numRoundsInCurrentTurn,
-			promptTokenCount: usage?.prompt_tokens,
-			promptCacheTokenCount: usage?.prompt_tokens_details?.cached_tokens,
-			responseTokenCount: usage?.completion_tokens,
-		});
-	}
-
 	private applyMessagePostProcessing(messages: Raw.ChatMessage[], options?: { stripOrphanedToolCalls?: boolean }): Raw.ChatMessage[] {
 		return this.validateToolMessages(
 			ToolCallingLoop.stripInternalToolCallIds(messages), options);
@@ -1879,8 +1638,6 @@ export interface IToolCallSingleResult {
 	hadIgnoredFiles: boolean;
 	lastRequestMessages: Raw.ChatMessage[];
 	availableTools: readonly LanguageModelToolInformation[];
-	/** Set when the prompt included inline summarization instructions. */
-	inlineSummarizationRequested?: boolean;
 }

 export interface IToolCallLoopResult extends IToolCallSingleResult {
@@ -687,10 +687,9 @@ class DefaultToolCallingLoop extends ToolCallingLoop<IDefaultToolLoopOptions> {

 	protected override async fetch(opts: ToolCallingLoopFetchOptions, token: CancellationToken): Promise<ChatResponse> {
 		const messageSourcePrefix = this.options.location === ChatLocation.Editor ? 'inline' : 'chat';
-		const baseDebugName = this.options.request.subAgentInvocationId ?
+		const debugName = this.options.request.subAgentInvocationId ?
 			`tool/runSubagent${this.options.request.subAgentName ? `-${this.options.request.subAgentName}` : ''}` :
 			`${ChatLocation.toStringShorter(this.options.location)}/${this.options.intent?.id}`;
-		const debugName = this._isInlineSummarizationRequest ? 'inlineSummarizeConversationHistory-full' : baseDebugName;
 		const location = this.options.overrideRequestLocation ?? this.options.location;
 		const isThinkingLocation = location === ChatLocation.Agent || location === ChatLocation.MessagesProxy;
 		const rawEffort = this.options.request.modelConfiguration?.reasoningEffort;
@@ -56,14 +56,6 @@ export interface AgentPromptProps extends GenericBasePromptElementProps {

 	readonly triggerSummarize?: boolean;

-	/**
-	 * When true, appends a summarization instruction as a user message in the
-	 * current agent loop iteration instead of making a separate LLM call.
-	 * The model outputs ONLY a summary (no tool calls) and the loop continues
-	 * with the compacted history on the next iteration.
-	 */
-	readonly inlineSummarization?: boolean;
-
 	/**
 	 * Enables cache breakpoints and summarization
 	 */
@@ -151,7 +143,6 @@ export class AgentPrompt extends PromptElement<AgentPromptProps> {
 				<SummarizedConversationHistory
 					flexGrow={1}
 					triggerSummarize={this.props.triggerSummarize}
-					inlineSummarization={this.props.inlineSummarization}
 					priority={900}
 					promptContext={this.props.promptContext}
 					location={this.props.location}
@@ -18,7 +18,6 @@ import { CUSTOM_TOOL_SEARCH_NAME } from '../../../../platform/networking/common/
 import { IChatEndpoint } from '../../../../platform/networking/common/networking';
 import { APIUsage } from '../../../../platform/networking/common/openai';
 import { IPromptPathRepresentationService } from '../../../../platform/prompts/common/promptPathRepresentationService';
-import { IExperimentationService } from '../../../../platform/telemetry/common/nullExperimentationService';
 import { ITelemetryService } from '../../../../platform/telemetry/common/telemetry';
 import { ThinkingData } from '../../../../platform/thinking/common/thinking';
 import { computePromptTokenDetails } from '../../../../platform/tokenizer/node/promptTokenDetails';
@@ -398,8 +397,6 @@ export interface SummarizedAgentHistoryProps extends BasePromptElementProps, Age
 	readonly location: ChatLocation;
 	readonly promptContext: IBuildPromptContext;
 	readonly triggerSummarize?: boolean;
-	/** When true, appends a summarization instruction in the agent loop instead of a separate LLM call. */
-	readonly inlineSummarization?: boolean;
 	readonly tools?: ReadonlyArray<LanguageModelToolInformation> | undefined;
 	readonly enableCacheBreakpoints?: boolean;
 	readonly workingNotebook?: NotebookDocument;
@@ -420,8 +417,6 @@ export class SummarizedConversationHistory extends PromptElement<SummarizedAgent
 		props: SummarizedAgentHistoryProps,
 		@IInstantiationService private readonly instantiationService: IInstantiationService,
 		@ISessionTranscriptService private readonly sessionTranscriptService: ISessionTranscriptService,
-		@IConfigurationService private readonly configurationService: IConfigurationService,
-		@IExperimentationService private readonly experimentationService: IExperimentationService,
 	) {
 		super(props);
 	}
@@ -429,12 +424,10 @@ export class SummarizedConversationHistory extends PromptElement<SummarizedAgent
 	override async render(state: void, sizing: PromptSizing, progress: Progress<ChatResponsePart> | undefined, token: CancellationToken | undefined) {
 		const promptContext = { ...this.props.promptContext };
 		let historyMetadata: SummarizedConversationHistoryMetadata | undefined;
-		const transcriptLookupEnabled = this.configurationService.getExperimentBasedConfig(ConfigKey.ConversationTranscriptLookup, this.experimentationService);
-
 		// Resolve transcript path and flush to disk so the model can read the up-to-date file
 		let transcriptPath: string | undefined;
 		const sessionId = this.props.promptContext.conversation?.sessionId;
-		if (transcriptLookupEnabled && sessionId) {
+		if (sessionId) {
 			// Lazily start the transcript session now (before summarization) so it
 			// captures the full pre-compaction conversation. startSession is
 			// idempotent — if hooks already started it, this is a no-op.
@@ -479,18 +472,12 @@ export class SummarizedConversationHistory extends PromptElement<SummarizedAgent
 			}
 		}

-		// Inline summarization: append instruction as a user message in the agent loop
-		// instead of making a separate LLM call. The model outputs only a summary.
-		const inlineSummarizationRequested = this.props.inlineSummarization && !this.props.triggerSummarize;
-
 		return <>
 			{historyMetadata && <meta value={historyMetadata} />}
-			{inlineSummarizationRequested && <meta value={new InlineSummarizationRequestedMetadata()} />}
 			<ConversationHistory
 				{...this.props}
 				promptContext={promptContext}
 				enableCacheBreakpoints={this.props.enableCacheBreakpoints} />
-			{inlineSummarizationRequested && <InlineSummarizationUserMessage priority={1000} endpoint={this.props.endpoint} />}
 		</>;
 	}

@@ -687,7 +674,7 @@ class ConversationHistorySummarizer {
 			const budgetExceeded = e instanceof BudgetExceededError;
 			const outcome = budgetExceeded ? 'budget_exceeded' : 'renderError';
 			this.logInfo(`Error rendering summarization prompt in mode: ${mode}. ${e.stack}`, mode);
-			this.sendSummarizationTelemetry(outcome, '', this.props.endpoint.model, mode, stopwatch.elapsed(), undefined);
+			this.sendSummarizationTelemetry(outcome, '', this.props.endpoint.model, mode, stopwatch.elapsed(), undefined, e instanceof Error ? e.message : String(e));
 			throw e;
 		}

@@ -704,7 +691,7 @@ class ConversationHistorySummarizer {
 					}, type: 'function'
 				})),
 				(tool, rule) => {
-					this.logService.warn(`Tool ${tool} failed validation: ${rule}`);
+					this.logService.warn(`[ConversationHistorySummarizer] Tool ${tool} failed validation: ${rule}`);
 				},
 			) : undefined;
 			const toolOpts = normalizedTools?.length ? {
@@ -766,7 +753,7 @@ class ConversationHistorySummarizer {
 			}, this.token ?? CancellationToken.None);
 		} catch (e) {
 			this.logInfo(`Error from summarization request. ${e.message}`, mode);
-			this.sendSummarizationTelemetry('requestThrow', '', this.props.endpoint.model, mode, stopwatch.elapsed(), undefined);
+			this.sendSummarizationTelemetry('requestThrow', '', this.props.endpoint.model, mode, stopwatch.elapsed(), undefined, e instanceof Error ? e.message : String(e));
 			throw e;
 		}

@@ -806,7 +793,7 @@ class ConversationHistorySummarizer {
 				? Math.min(this.sizing.tokenBudget, this.props.maxSummaryTokens)
 				: this.sizing.tokenBudget;
 		if (summarySize > effectiveBudget) {
-			this.sendSummarizationTelemetry('too_large', response.requestId, this.props.endpoint.model, mode, elapsedTime, response.usage);
+			this.sendSummarizationTelemetry('too_large', response.requestId, this.props.endpoint.model, mode, elapsedTime, response.usage, `${summarySize} tokens exceeds budget ${effectiveBudget}`);
 			this.logInfo(`Summary too large: ${summarySize} tokens (effective budget ${effectiveBudget})`, mode);
 			throw new Error('Summary too large');
 		}
@@ -1072,14 +1059,7 @@ class SummaryMessageElement extends PromptElement<SummaryMessageProps> {
 	}
 }

-/**
- * Metadata flag indicating that inline summarization was requested in this render.
- * The caller (agentIntent) checks for this to know the model response should
- * contain only a summary.
- */
-export class InlineSummarizationRequestedMetadata extends PromptMetadata { }
-
-interface InlineSummarizationUserMessageProps extends BasePromptElementProps {
+export interface InlineSummarizationUserMessageProps extends BasePromptElementProps {
 	readonly endpoint: IChatEndpoint;
 }

@@ -1089,7 +1069,7 @@ interface InlineSummarizationUserMessageProps extends BasePromptElementProps {
 * no tool calls. The summary is extracted from the response and stored on the round
 * for the next iteration.
 */
-class InlineSummarizationUserMessage extends PromptElement<InlineSummarizationUserMessageProps> {
+export class InlineSummarizationUserMessage extends PromptElement<InlineSummarizationUserMessageProps> {
 	override async render(state: void, sizing: PromptSizing) {
 		const isOpus = this.props.endpoint.model.startsWith('claude-opus');
 		return <UserMessage priority={1000}>
@@ -30,7 +30,7 @@ import { ToolName } from '../../../../tools/common/toolNames';
 import { PromptRenderer } from '../../base/promptRenderer';
 import { AgentPrompt, AgentPromptProps } from '../agentPrompt';
 import { PromptRegistry } from '../promptRegistry';
-import { ConversationHistorySummarizationPrompt, extractInlineSummary, InlineSummarizationRequestedMetadata, stripToolSearchMessages, SummarizedConversationHistory, SummarizedConversationHistoryMetadata, SummarizedConversationHistoryPropsBuilder } from '../summarizedConversationHistory';
+import { ConversationHistorySummarizationPrompt, extractInlineSummary, stripToolSearchMessages, SummarizedConversationHistory, SummarizedConversationHistoryMetadata, SummarizedConversationHistoryPropsBuilder } from '../summarizedConversationHistory';

 suite('Agent Summarization', () => {
 	let accessor: ITestingServicesAccessor;
@@ -582,162 +582,6 @@ suite('extractInlineSummary', () => {
 	});
 });

-suite('Inline Summarization Prompt', () => {
-	let accessor: ITestingServicesAccessor;
-
-	beforeAll(() => {
-		const services = createExtensionUnitTestingServices();
-		services.define(IWorkspaceService, new SyncDescriptor(
-			TestWorkspaceService,
-			[
-				[URI.file('/workspace')],
-				[]
-			]
-		));
-		services.define(IChatMLFetcher, new StaticChatMLFetcher([]));
-		accessor = services.createTestingAccessor();
-	});
-
-	afterAll(() => {
-		accessor.dispose();
-	});
-
-	test('inlineSummarization=true appends summarization user message and metadata', async () => {
-		const instaService = accessor.get(IInstantiationService);
-		const endpoint = instaService.createInstance(MockEndpoint, undefined);
-		const turn = new Turn('turnId', { type: 'user', message: 'hello' });
-		const conversation = new Conversation('sessionId', [turn]);
-
-		const firstTurn = new Turn('id1', { type: 'user', message: 'previous turn message' });
-		firstTurn.setResponse(TurnStatus.Success, { type: 'user', message: 'response' }, 'responseId', {
-			metadata: {
-				toolCallRounds: [
-					new ToolCallRound('ok', [{
-						id: 'tooluse_1',
-						name: ToolName.EditFile,
-						arguments: JSON.stringify({ filePath: '/workspace/file.ts', code: 'test' })
-					}]),
-				]
-			}
-		} as ICopilotChatResultIn);
-
-		const promptContext: IBuildPromptContext = {
-			chatVariables: new ChatVariablesCollection([]),
-			history: [firstTurn],
-			query: 'continue',
-			toolCallRounds: [
-				new ToolCallRound('ok 2', [{
-					id: 'tooluse_2',
-					name: ToolName.EditFile,
-					arguments: JSON.stringify({ filePath: '/workspace/file.ts', code: 'test2' })
-				}]),
-			],
-			toolCallResults: {
-				'tooluse_2': new LanguageModelToolResult([new LanguageModelTextPart('success')]),
-			},
-			tools: {
-				availableTools: [],
-				toolInvocationToken: null as never,
-				toolReferences: [],
-			},
-			conversation,
-		};
-
-		const customizations = await PromptRegistry.resolveAllCustomizations(instaService, endpoint);
-		const props: AgentPromptProps = {
-			priority: 1,
-			endpoint,
-			location: ChatLocation.Panel,
-			promptContext,
-			enableCacheBreakpoints: true,
-			inlineSummarization: true,
-			customizations,
-		};
-
-		const renderer = PromptRenderer.create(instaService, endpoint, AgentPrompt, props);
-		const result = await renderer.render();
-
-		// Should have InlineSummarizationRequestedMetadata set
-		const inlineMeta = result.metadata.get(InlineSummarizationRequestedMetadata);
-		expect(inlineMeta).toBeDefined();
-
-		// The last user message should contain summarization instructions
-		const userMessages = result.messages.filter(m => m.role === Raw.ChatRole.User);
-		const lastUserMessage = userMessages[userMessages.length - 1];
-		const lastMessageText = lastUserMessage.content.map(c => 'text' in c ? c.text : '').join('');
-		expect(lastMessageText).toContain('summary');
-		expect(lastMessageText).toContain('Do NOT call any tools');
-
-		// Should NOT have the separate-call summarization metadata
-		const summaryMeta = result.metadata.get(SummarizedConversationHistoryMetadata);
-		expect(summaryMeta).toBeUndefined();
-	});
-
-	test('inlineSummarization=true sets metadata when triggerSummarize is false', async () => {
-		const instaService = accessor.get(IInstantiationService);
-		const endpoint = instaService.createInstance(MockEndpoint, undefined);
-
-		const firstTurn = new Turn('id1', { type: 'user', message: 'previous turn message' });
-		firstTurn.setResponse(TurnStatus.Success, { type: 'user', message: 'response' }, 'responseId', {
-			metadata: {
-				toolCallRounds: [
-					new ToolCallRound('ok', [{
-						id: 'tooluse_1',
-						name: ToolName.EditFile,
-						arguments: JSON.stringify({ filePath: '/workspace/file.ts', code: 'test' })
-					}]),
-				]
-			}
-		} as ICopilotChatResultIn);
-
-		const promptContext: IBuildPromptContext = {
-			chatVariables: new ChatVariablesCollection([]),
-			history: [firstTurn],
-			query: 'continue',
-			toolCallRounds: [
-				new ToolCallRound('ok 2', [{
-					id: 'tooluse_2',
-					name: ToolName.EditFile,
-					arguments: JSON.stringify({ filePath: '/workspace/file.ts', code: 'test2' })
-				}]),
-			],
-			toolCallResults: {
-				'tooluse_2': new LanguageModelToolResult([new LanguageModelTextPart('success')]),
-			},
-			tools: {
-				availableTools: [],
-				toolInvocationToken: null as never,
-				toolReferences: [],
-			},
-		};
-
-		// When both triggerSummarize and inlineSummarization are true,
-		// triggerSummarize should take precedence (inlineSummarization condition
-		// requires triggerSummarize to be false).
-		// We test this indirectly: inlineSummarization=true with triggerSummarize=false
-		// should set InlineSummarizationRequestedMetadata, but if triggerSummarize were
-		// also true, the inline path would be skipped.
-		const customizations = await PromptRegistry.resolveAllCustomizations(instaService, endpoint);
-		const propsInlineOnly: AgentPromptProps = {
-			priority: 1,
-			endpoint,
-			location: ChatLocation.Panel,
-			promptContext,
-			enableCacheBreakpoints: true,
-			triggerSummarize: false,
-			inlineSummarization: true,
-			customizations,
-		};
-
-		const renderer = PromptRenderer.create(instaService, endpoint, AgentPrompt, propsInlineOnly);
-		const result = await renderer.render();
-
-		// Inline metadata should be set when triggerSummarize is false
-		const inlineMeta = result.metadata.get(InlineSummarizationRequestedMetadata);
-		expect(inlineMeta).toBeDefined();
-	});
-});
-
 suite('stripToolSearchMessages', () => {
 	function makeAssistantMessage(toolCalls: { id: string; name: string }[], text = 'response'): Raw.ChatMessage {
 		return {
@@ -965,8 +965,6 @@ export namespace ConfigKey {
 	export const NewWorkspaceCreationAgentEnabled = defineSetting<boolean>('chat.newWorkspaceCreation.enabled', ConfigType.Simple, true);
 	export const NewWorkspaceUseContext7 = defineSetting<boolean>('chat.newWorkspace.useContext7', ConfigType.Simple, false);
 	export const SummarizeAgentConversationHistory = defineSetting<boolean>('chat.summarizeAgentConversationHistory.enabled', ConfigType.Simple, true);
-	export const ConversationTranscriptLookup = defineSetting<boolean>('chat.conversationTranscriptLookup.enabled', ConfigType.ExperimentBased, false);
-	export const BackgroundCompaction = defineSetting<boolean>('chat.backgroundCompaction', ConfigType.ExperimentBased, false);
 	export const VirtualToolThreshold = defineSetting<number>('chat.virtualTools.threshold', ConfigType.ExperimentBased, HARD_TOOL_LIMIT);
 	export const CurrentEditorAgentContext = defineSetting<boolean>('chat.agent.currentEditorContext.enabled', ConfigType.Simple, true);
 	/** BYOK  */