Support rendering reserved output separately (#299867)

* Support rendering reserved output separately * Fix some of the progress bar logic * Better handling for reserve
2026-04-02 08:15:56 +01:00 · 2026-03-06 16:38:40 -05:00
parent ca433bc500
commit f3680f6a81
8 changed files with 130 additions and 20 deletions
--- a/src/vs/workbench/api/browser/mainThreadChatAgents2.ts
+++ b/src/vs/workbench/api/browser/mainThreadChatAgents2.ts
@@ -411,6 +411,7 @@ export class MainThreadChatAgents2 extends Disposable implements MainThreadChatA
 						kind: 'usage',
 						promptTokens: progress.promptTokens,
 						completionTokens: progress.completionTokens,
+						outputBuffer: progress.outputBuffer,
 						promptTokenDetails: progress.promptTokenDetails
 					});
 				}
--- a/src/vs/workbench/api/common/extHost.protocol.ts
+++ b/src/vs/workbench/api/common/extHost.protocol.ts
@@ -2528,6 +2528,7 @@ export interface IChatUsageDto {
 	kind: 'usage';
 	promptTokens: number;
 	completionTokens: number;
+	outputBuffer?: number;
 	promptTokenDetails?: readonly { category: string; label: string; percentageOfPrompt: number }[];
 }

--- a/src/vs/workbench/api/common/extHostChatAgents2.ts
+++ b/src/vs/workbench/api/common/extHostChatAgents2.ts
@@ -440,6 +440,7 @@ export class ChatAgentResponseStream {
 						kind: 'usage',
 						promptTokens: usage.promptTokens,
 						completionTokens: usage.completionTokens,
+						outputBuffer: usage.outputBuffer,
 						promptTokenDetails: usage.promptTokenDetails
 					};
 					_report(dto);
--- a/src/vs/workbench/contrib/chat/browser/widgetHosts/viewPane/chatContextUsageDetails.ts
+++ b/src/vs/workbench/contrib/chat/browser/widgetHosts/viewPane/chatContextUsageDetails.ts
@@ -22,8 +22,10 @@ export interface IChatContextUsagePromptTokenDetail {

 export interface IChatContextUsageData {
 	usedTokens: number;
+	completionTokens: number;
 	totalContextWindow: number;
 	percentage: number;
+	outputBufferPercentage?: number;
 	promptTokenDetails?: readonly IChatContextUsagePromptTokenDetail[];
 }

@@ -39,6 +41,8 @@ export class ChatContextUsageDetails extends Disposable {
 	private readonly percentageLabel: HTMLElement;
 	private readonly tokenCountLabel: HTMLElement;
 	private readonly progressFill: HTMLElement;
+	private readonly outputBufferFill: HTMLElement;
+	private readonly outputBufferLegend: HTMLElement;
 	private readonly tokenDetailsContainer: HTMLElement;
 	private readonly warningMessage: HTMLElement;
 	private readonly actionsSection: HTMLElement;
@@ -67,6 +71,14 @@ export class ChatContextUsageDetails extends Disposable {
 		// Progress bar
 		const progressBar = this.quotaItem.appendChild($('.quota-bar'));
 		this.progressFill = progressBar.appendChild($('.quota-bit'));
+		this.outputBufferFill = progressBar.appendChild($('.quota-bit.output-buffer'));
+
+		// Output buffer legend (shown only when outputBuffer is provided)
+		this.outputBufferLegend = this.quotaItem.appendChild($('.output-buffer-legend'));
+		this.outputBufferLegend.appendChild($('.output-buffer-swatch'));
+		const legendLabel = this.outputBufferLegend.appendChild($('span'));
+		legendLabel.textContent = localize('outputReserved', "Reserved for response");
+		this.outputBufferLegend.style.display = 'none';

 		// Token details container (for category breakdown)
 		this.tokenDetailsContainer = this.domNode.appendChild($('.token-details-container'));
@@ -98,25 +110,39 @@ export class ChatContextUsageDetails extends Disposable {
 	}

 	update(data: IChatContextUsageData): void {
-		const { percentage, usedTokens, totalContextWindow, promptTokenDetails } = data;
+		const { percentage, usedTokens, totalContextWindow, outputBufferPercentage, promptTokenDetails } = data;

-		// Update token count and percentage
+		// Update token count and percentage — reflects actual usage only
 		this.tokenCountLabel.textContent = localize(
 			'tokenCount',
 			"{0} / {1} tokens",
 			this.formatTokenCount(usedTokens, 1),
 			this.formatTokenCount(totalContextWindow, 0)
 		);
-		this.percentageLabel.textContent = localize('quotaDisplay', "{0}%", percentage.toFixed(0));
+		this.percentageLabel.textContent = localize('quotaDisplay', "{0}%", Math.min(100, percentage).toFixed(0));

-		// Update progress bar
-		this.progressFill.style.width = `${Math.min(100, percentage)}%`;
+		// Progress bar: actual usage fill + remaining reserved output fill
+		const usageBarWidth = Math.max(0, Math.min(100, percentage));
+		this.progressFill.style.width = `${usageBarWidth}%`;

-		// Update color classes based on usage level on the quota item
+		if (outputBufferPercentage !== undefined && outputBufferPercentage > 0) {
+			// Clamp so the reserve never overflows the bar
+			this.outputBufferFill.style.width = `${Math.max(0, Math.min(100 - usageBarWidth, outputBufferPercentage))}%`;
+			this.outputBufferFill.style.display = '';
+			this.outputBufferLegend.style.display = '';
+		} else {
+			this.outputBufferFill.style.width = '0';
+			this.outputBufferFill.style.display = 'none';
+			this.outputBufferLegend.style.display = 'none';
+		}
+
+		// Color classes based on total spoken-for percentage
+		// (actual usage + remaining reserve)
+		const effectivePercentage = percentage + (outputBufferPercentage ?? 0);
 		this.quotaItem.classList.remove('warning', 'error');
-		if (percentage >= 90) {
+		if (effectivePercentage >= 90) {
 			this.quotaItem.classList.add('error');
-		} else if (percentage >= 75) {
+		} else if (effectivePercentage >= 75) {
 			this.quotaItem.classList.add('warning');
 		}

--- a/src/vs/workbench/contrib/chat/browser/widgetHosts/viewPane/chatContextUsageWidget.ts
+++ b/src/vs/workbench/contrib/chat/browser/widgetHosts/viewPane/chatContextUsageWidget.ts
@@ -274,32 +274,42 @@ export class ChatContextUsageWidget extends Disposable {
 		}

 		const promptTokens = usage.promptTokens;
+		const completionTokens = usage.completionTokens;
 		const promptTokenDetails = usage.promptTokenDetails;
+		const outputBuffer = usage.outputBuffer;
 		const totalContextWindow = maxInputTokens + maxOutputTokens;
-		const usedTokens = promptTokens + maxOutputTokens;
-		const percentage = Math.min(100, (usedTokens / totalContextWindow) * 100);
+		const usedTokens = promptTokens + completionTokens;
+		const percentage = (usedTokens / totalContextWindow) * 100;

-		this.render(percentage, usedTokens, totalContextWindow, promptTokenDetails);
+		// Remaining reserve = whatever the model reserved minus what completions
+		// have already consumed. Once completions exceed the reserve, it drops to 0.
+		const outputBufferPercentage = outputBuffer !== undefined
+			? (Math.max(0, outputBuffer - completionTokens) / totalContextWindow) * 100
+			: undefined;
+
+		this.render(percentage, completionTokens, usedTokens, totalContextWindow, outputBufferPercentage, promptTokenDetails);
 		this.show();
 	}

-	private render(percentage: number, usedTokens: number, totalContextWindow: number, promptTokenDetails?: readonly { category: string; label: string; percentageOfPrompt: number }[]): void {
+	private render(percentage: number, completionTokens: number, usedTokens: number, totalContextWindow: number, outputBufferPercentage: number | undefined, promptTokenDetails?: readonly { category: string; label: string; percentageOfPrompt: number }[]): void {
 		// Store current data for use in details popup
-		this.currentData = { usedTokens, totalContextWindow, percentage, promptTokenDetails };
+		this.currentData = { usedTokens, completionTokens, totalContextWindow, percentage, outputBufferPercentage, promptTokenDetails };

-		// Update pie chart progress
-		this.progressIndicator.setProgress(percentage);
+		// Pie chart shows actual usage + remaining reserve so the user can see
+		// how much of the context window is spoken for.
+		this.progressIndicator.setProgress(percentage + (outputBufferPercentage ?? 0));

-		// Update percentage label and aria-label
-		const roundedPercentage = Math.round(percentage);
+		// Update percentage label and aria-label (clamp display to 100)
+		const roundedPercentage = Math.min(100, Math.round(percentage));
 		this.percentageLabel.textContent = `${roundedPercentage}%`;
 		this.domNode.setAttribute('aria-label', localize('contextUsagePercentageLabel', "Context window usage: {0}%", roundedPercentage));

-		// Update color based on usage level
+		// Color based on total spoken-for percentage (usage + remaining reserve)
+		const effectivePercentage = percentage + (outputBufferPercentage ?? 0);
 		this.domNode.classList.remove('warning', 'error');
-		if (percentage >= 90) {
+		if (effectivePercentage >= 90) {
 			this.domNode.classList.add('error');
-		} else if (percentage >= 75) {
+		} else if (effectivePercentage >= 75) {
 			this.domNode.classList.add('warning');
 		}
 	}
--- a/src/vs/workbench/contrib/chat/browser/widgetHosts/viewPane/media/chatContextUsageDetails.css
+++ b/src/vs/workbench/contrib/chat/browser/widgetHosts/viewPane/media/chatContextUsageDetails.css
@@ -52,6 +52,7 @@
 	border-radius: 4px;
 	border: 1px solid var(--vscode-gauge-border);
 	margin: 4px 0;
+	display: flex;
 }

 .chat-context-usage-details .quota-indicator .quota-bar .quota-bit {
@@ -61,6 +62,45 @@
 	transition: width 0.3s ease;
 }

+.chat-context-usage-details .quota-indicator .quota-bar .quota-bit.output-buffer {
+	background: repeating-linear-gradient(
+		-45deg,
+		var(--vscode-gauge-foreground),
+		var(--vscode-gauge-foreground) 2px,
+		transparent 2px,
+		transparent 4px
+	);
+	border-radius: 0 4px 4px 0;
+}
+
+.chat-context-usage-details .quota-indicator .quota-bar .quota-bit:not(.output-buffer):has(+ .quota-bit.output-buffer:not([style*="display: none"])) {
+	border-radius: 4px 0 0 4px;
+}
+
+/* Output buffer legend */
+.chat-context-usage-details .quota-indicator .output-buffer-legend {
+	display: flex;
+	align-items: center;
+	gap: 6px;
+	margin-top: 4px;
+	font-size: 11px;
+	color: var(--vscode-descriptionForeground);
+}
+
+.chat-context-usage-details .quota-indicator .output-buffer-legend .output-buffer-swatch {
+	width: 12px;
+	height: 8px;
+	border-radius: 2px;
+	background: repeating-linear-gradient(
+		-45deg,
+		var(--vscode-gauge-foreground),
+		var(--vscode-gauge-foreground) 2px,
+		transparent 2px,
+		transparent 4px
+	);
+	flex-shrink: 0;
+}
+
 .chat-context-usage-details .quota-indicator.warning .quota-bar {
 	background-color: var(--vscode-gauge-warningBackground);
 }
@@ -69,6 +109,16 @@
 	background-color: var(--vscode-gauge-warningForeground);
 }

+.chat-context-usage-details .quota-indicator.warning .quota-bar .quota-bit.output-buffer {
+	background: repeating-linear-gradient(
+		-45deg,
+		var(--vscode-gauge-warningForeground),
+		var(--vscode-gauge-warningForeground) 2px,
+		transparent 2px,
+		transparent 4px
+	);
+}
+
 .chat-context-usage-details .quota-indicator.error .quota-bar {
 	background-color: var(--vscode-gauge-errorBackground);
 }
@@ -77,6 +127,16 @@
 	background-color: var(--vscode-gauge-errorForeground);
 }

+.chat-context-usage-details .quota-indicator.error .quota-bar .quota-bit.output-buffer {
+	background: repeating-linear-gradient(
+		-45deg,
+		var(--vscode-gauge-errorForeground),
+		var(--vscode-gauge-errorForeground) 2px,
+		transparent 2px,
+		transparent 4px
+	);
+}
+
 /* Description / warning text — matching ChatStatusDashboard */
 .chat-context-usage-details div.description {
 	font-size: 11px;
@@ -100,6 +160,10 @@
 	font-weight: 600;
 }

+.chat-context-usage-details .token-category:first-child .token-category-header {
+	margin-top: 8px;
+}
+
 .chat-context-usage-details .token-detail-item {
 	display: flex;
 	justify-content: space-between;
--- a/src/vs/workbench/contrib/chat/common/chatService/chatService.ts
+++ b/src/vs/workbench/contrib/chat/common/chatService/chatService.ts
@@ -151,6 +151,7 @@ export interface IChatUsagePromptTokenDetail {
 export interface IChatUsage {
 	promptTokens: number;
 	completionTokens: number;
+	outputBuffer?: number;
 	promptTokenDetails?: readonly IChatUsagePromptTokenDetail[];
 	kind: 'usage';
 }
--- a/src/vscode-dts/vscode.proposed.chatParticipantAdditions.d.ts
+++ b/src/vscode-dts/vscode.proposed.chatParticipantAdditions.d.ts
@@ -839,6 +839,12 @@ declare module 'vscode' {
 		 */
 		readonly completionTokens: number;

+		/**
+		 * The number of tokens reserved for the response.
+		 * This is rendered specially in the UI to indicate that these tokens aren't used but are reserved.
+		 */
+		readonly outputBuffer?: number;
+
 		/**
 		 * Optional breakdown of prompt token usage by category and label.
 		 * If the percentages do not sum to 100%, the remaining will be shown as "Uncategorized".