From f3680f6a81e375c4224cc3490fcae10b17552ac0 Mon Sep 17 00:00:00 2001 From: Logan Ramos Date: Fri, 6 Mar 2026 16:38:40 -0500 Subject: [PATCH] Support rendering reserved output separately (#299867) * Support rendering reserved output separately * Fix some of the progress bar logic * Better handling for reserve --- .../api/browser/mainThreadChatAgents2.ts | 1 + .../workbench/api/common/extHost.protocol.ts | 1 + .../api/common/extHostChatAgents2.ts | 1 + .../viewPane/chatContextUsageDetails.ts | 42 +++++++++--- .../viewPane/chatContextUsageWidget.ts | 34 ++++++---- .../media/chatContextUsageDetails.css | 64 +++++++++++++++++++ .../chat/common/chatService/chatService.ts | 1 + ...ode.proposed.chatParticipantAdditions.d.ts | 6 ++ 8 files changed, 130 insertions(+), 20 deletions(-) diff --git a/src/vs/workbench/api/browser/mainThreadChatAgents2.ts b/src/vs/workbench/api/browser/mainThreadChatAgents2.ts index 9459b611a98..b851f3fe206 100644 --- a/src/vs/workbench/api/browser/mainThreadChatAgents2.ts +++ b/src/vs/workbench/api/browser/mainThreadChatAgents2.ts @@ -411,6 +411,7 @@ export class MainThreadChatAgents2 extends Disposable implements MainThreadChatA kind: 'usage', promptTokens: progress.promptTokens, completionTokens: progress.completionTokens, + outputBuffer: progress.outputBuffer, promptTokenDetails: progress.promptTokenDetails }); } diff --git a/src/vs/workbench/api/common/extHost.protocol.ts b/src/vs/workbench/api/common/extHost.protocol.ts index be954021047..1f3304a5d75 100644 --- a/src/vs/workbench/api/common/extHost.protocol.ts +++ b/src/vs/workbench/api/common/extHost.protocol.ts @@ -2528,6 +2528,7 @@ export interface IChatUsageDto { kind: 'usage'; promptTokens: number; completionTokens: number; + outputBuffer?: number; promptTokenDetails?: readonly { category: string; label: string; percentageOfPrompt: number }[]; } diff --git a/src/vs/workbench/api/common/extHostChatAgents2.ts b/src/vs/workbench/api/common/extHostChatAgents2.ts index 5edee47784f..d91ae3ce3f0 100644 --- a/src/vs/workbench/api/common/extHostChatAgents2.ts +++ b/src/vs/workbench/api/common/extHostChatAgents2.ts @@ -440,6 +440,7 @@ export class ChatAgentResponseStream { kind: 'usage', promptTokens: usage.promptTokens, completionTokens: usage.completionTokens, + outputBuffer: usage.outputBuffer, promptTokenDetails: usage.promptTokenDetails }; _report(dto); diff --git a/src/vs/workbench/contrib/chat/browser/widgetHosts/viewPane/chatContextUsageDetails.ts b/src/vs/workbench/contrib/chat/browser/widgetHosts/viewPane/chatContextUsageDetails.ts index 9c63561bb60..47fe2a59f2c 100644 --- a/src/vs/workbench/contrib/chat/browser/widgetHosts/viewPane/chatContextUsageDetails.ts +++ b/src/vs/workbench/contrib/chat/browser/widgetHosts/viewPane/chatContextUsageDetails.ts @@ -22,8 +22,10 @@ export interface IChatContextUsagePromptTokenDetail { export interface IChatContextUsageData { usedTokens: number; + completionTokens: number; totalContextWindow: number; percentage: number; + outputBufferPercentage?: number; promptTokenDetails?: readonly IChatContextUsagePromptTokenDetail[]; } @@ -39,6 +41,8 @@ export class ChatContextUsageDetails extends Disposable { private readonly percentageLabel: HTMLElement; private readonly tokenCountLabel: HTMLElement; private readonly progressFill: HTMLElement; + private readonly outputBufferFill: HTMLElement; + private readonly outputBufferLegend: HTMLElement; private readonly tokenDetailsContainer: HTMLElement; private readonly warningMessage: HTMLElement; private readonly actionsSection: HTMLElement; @@ -67,6 +71,14 @@ export class ChatContextUsageDetails extends Disposable { // Progress bar const progressBar = this.quotaItem.appendChild($('.quota-bar')); this.progressFill = progressBar.appendChild($('.quota-bit')); + this.outputBufferFill = progressBar.appendChild($('.quota-bit.output-buffer')); + + // Output buffer legend (shown only when outputBuffer is provided) + this.outputBufferLegend = this.quotaItem.appendChild($('.output-buffer-legend')); + this.outputBufferLegend.appendChild($('.output-buffer-swatch')); + const legendLabel = this.outputBufferLegend.appendChild($('span')); + legendLabel.textContent = localize('outputReserved', "Reserved for response"); + this.outputBufferLegend.style.display = 'none'; // Token details container (for category breakdown) this.tokenDetailsContainer = this.domNode.appendChild($('.token-details-container')); @@ -98,25 +110,39 @@ export class ChatContextUsageDetails extends Disposable { } update(data: IChatContextUsageData): void { - const { percentage, usedTokens, totalContextWindow, promptTokenDetails } = data; + const { percentage, usedTokens, totalContextWindow, outputBufferPercentage, promptTokenDetails } = data; - // Update token count and percentage + // Update token count and percentage — reflects actual usage only this.tokenCountLabel.textContent = localize( 'tokenCount', "{0} / {1} tokens", this.formatTokenCount(usedTokens, 1), this.formatTokenCount(totalContextWindow, 0) ); - this.percentageLabel.textContent = localize('quotaDisplay', "{0}%", percentage.toFixed(0)); + this.percentageLabel.textContent = localize('quotaDisplay', "{0}%", Math.min(100, percentage).toFixed(0)); - // Update progress bar - this.progressFill.style.width = `${Math.min(100, percentage)}%`; + // Progress bar: actual usage fill + remaining reserved output fill + const usageBarWidth = Math.max(0, Math.min(100, percentage)); + this.progressFill.style.width = `${usageBarWidth}%`; - // Update color classes based on usage level on the quota item + if (outputBufferPercentage !== undefined && outputBufferPercentage > 0) { + // Clamp so the reserve never overflows the bar + this.outputBufferFill.style.width = `${Math.max(0, Math.min(100 - usageBarWidth, outputBufferPercentage))}%`; + this.outputBufferFill.style.display = ''; + this.outputBufferLegend.style.display = ''; + } else { + this.outputBufferFill.style.width = '0'; + this.outputBufferFill.style.display = 'none'; + this.outputBufferLegend.style.display = 'none'; + } + + // Color classes based on total spoken-for percentage + // (actual usage + remaining reserve) + const effectivePercentage = percentage + (outputBufferPercentage ?? 0); this.quotaItem.classList.remove('warning', 'error'); - if (percentage >= 90) { + if (effectivePercentage >= 90) { this.quotaItem.classList.add('error'); - } else if (percentage >= 75) { + } else if (effectivePercentage >= 75) { this.quotaItem.classList.add('warning'); } diff --git a/src/vs/workbench/contrib/chat/browser/widgetHosts/viewPane/chatContextUsageWidget.ts b/src/vs/workbench/contrib/chat/browser/widgetHosts/viewPane/chatContextUsageWidget.ts index e051fbcfb3f..0bd56a8fe13 100644 --- a/src/vs/workbench/contrib/chat/browser/widgetHosts/viewPane/chatContextUsageWidget.ts +++ b/src/vs/workbench/contrib/chat/browser/widgetHosts/viewPane/chatContextUsageWidget.ts @@ -274,32 +274,42 @@ export class ChatContextUsageWidget extends Disposable { } const promptTokens = usage.promptTokens; + const completionTokens = usage.completionTokens; const promptTokenDetails = usage.promptTokenDetails; + const outputBuffer = usage.outputBuffer; const totalContextWindow = maxInputTokens + maxOutputTokens; - const usedTokens = promptTokens + maxOutputTokens; - const percentage = Math.min(100, (usedTokens / totalContextWindow) * 100); + const usedTokens = promptTokens + completionTokens; + const percentage = (usedTokens / totalContextWindow) * 100; - this.render(percentage, usedTokens, totalContextWindow, promptTokenDetails); + // Remaining reserve = whatever the model reserved minus what completions + // have already consumed. Once completions exceed the reserve, it drops to 0. + const outputBufferPercentage = outputBuffer !== undefined + ? (Math.max(0, outputBuffer - completionTokens) / totalContextWindow) * 100 + : undefined; + + this.render(percentage, completionTokens, usedTokens, totalContextWindow, outputBufferPercentage, promptTokenDetails); this.show(); } - private render(percentage: number, usedTokens: number, totalContextWindow: number, promptTokenDetails?: readonly { category: string; label: string; percentageOfPrompt: number }[]): void { + private render(percentage: number, completionTokens: number, usedTokens: number, totalContextWindow: number, outputBufferPercentage: number | undefined, promptTokenDetails?: readonly { category: string; label: string; percentageOfPrompt: number }[]): void { // Store current data for use in details popup - this.currentData = { usedTokens, totalContextWindow, percentage, promptTokenDetails }; + this.currentData = { usedTokens, completionTokens, totalContextWindow, percentage, outputBufferPercentage, promptTokenDetails }; - // Update pie chart progress - this.progressIndicator.setProgress(percentage); + // Pie chart shows actual usage + remaining reserve so the user can see + // how much of the context window is spoken for. + this.progressIndicator.setProgress(percentage + (outputBufferPercentage ?? 0)); - // Update percentage label and aria-label - const roundedPercentage = Math.round(percentage); + // Update percentage label and aria-label (clamp display to 100) + const roundedPercentage = Math.min(100, Math.round(percentage)); this.percentageLabel.textContent = `${roundedPercentage}%`; this.domNode.setAttribute('aria-label', localize('contextUsagePercentageLabel', "Context window usage: {0}%", roundedPercentage)); - // Update color based on usage level + // Color based on total spoken-for percentage (usage + remaining reserve) + const effectivePercentage = percentage + (outputBufferPercentage ?? 0); this.domNode.classList.remove('warning', 'error'); - if (percentage >= 90) { + if (effectivePercentage >= 90) { this.domNode.classList.add('error'); - } else if (percentage >= 75) { + } else if (effectivePercentage >= 75) { this.domNode.classList.add('warning'); } } diff --git a/src/vs/workbench/contrib/chat/browser/widgetHosts/viewPane/media/chatContextUsageDetails.css b/src/vs/workbench/contrib/chat/browser/widgetHosts/viewPane/media/chatContextUsageDetails.css index 21dd9bcb7d4..53344c162f3 100644 --- a/src/vs/workbench/contrib/chat/browser/widgetHosts/viewPane/media/chatContextUsageDetails.css +++ b/src/vs/workbench/contrib/chat/browser/widgetHosts/viewPane/media/chatContextUsageDetails.css @@ -52,6 +52,7 @@ border-radius: 4px; border: 1px solid var(--vscode-gauge-border); margin: 4px 0; + display: flex; } .chat-context-usage-details .quota-indicator .quota-bar .quota-bit { @@ -61,6 +62,45 @@ transition: width 0.3s ease; } +.chat-context-usage-details .quota-indicator .quota-bar .quota-bit.output-buffer { + background: repeating-linear-gradient( + -45deg, + var(--vscode-gauge-foreground), + var(--vscode-gauge-foreground) 2px, + transparent 2px, + transparent 4px + ); + border-radius: 0 4px 4px 0; +} + +.chat-context-usage-details .quota-indicator .quota-bar .quota-bit:not(.output-buffer):has(+ .quota-bit.output-buffer:not([style*="display: none"])) { + border-radius: 4px 0 0 4px; +} + +/* Output buffer legend */ +.chat-context-usage-details .quota-indicator .output-buffer-legend { + display: flex; + align-items: center; + gap: 6px; + margin-top: 4px; + font-size: 11px; + color: var(--vscode-descriptionForeground); +} + +.chat-context-usage-details .quota-indicator .output-buffer-legend .output-buffer-swatch { + width: 12px; + height: 8px; + border-radius: 2px; + background: repeating-linear-gradient( + -45deg, + var(--vscode-gauge-foreground), + var(--vscode-gauge-foreground) 2px, + transparent 2px, + transparent 4px + ); + flex-shrink: 0; +} + .chat-context-usage-details .quota-indicator.warning .quota-bar { background-color: var(--vscode-gauge-warningBackground); } @@ -69,6 +109,16 @@ background-color: var(--vscode-gauge-warningForeground); } +.chat-context-usage-details .quota-indicator.warning .quota-bar .quota-bit.output-buffer { + background: repeating-linear-gradient( + -45deg, + var(--vscode-gauge-warningForeground), + var(--vscode-gauge-warningForeground) 2px, + transparent 2px, + transparent 4px + ); +} + .chat-context-usage-details .quota-indicator.error .quota-bar { background-color: var(--vscode-gauge-errorBackground); } @@ -77,6 +127,16 @@ background-color: var(--vscode-gauge-errorForeground); } +.chat-context-usage-details .quota-indicator.error .quota-bar .quota-bit.output-buffer { + background: repeating-linear-gradient( + -45deg, + var(--vscode-gauge-errorForeground), + var(--vscode-gauge-errorForeground) 2px, + transparent 2px, + transparent 4px + ); +} + /* Description / warning text — matching ChatStatusDashboard */ .chat-context-usage-details div.description { font-size: 11px; @@ -100,6 +160,10 @@ font-weight: 600; } +.chat-context-usage-details .token-category:first-child .token-category-header { + margin-top: 8px; +} + .chat-context-usage-details .token-detail-item { display: flex; justify-content: space-between; diff --git a/src/vs/workbench/contrib/chat/common/chatService/chatService.ts b/src/vs/workbench/contrib/chat/common/chatService/chatService.ts index 4d1fd79be9f..b33bde89eb8 100644 --- a/src/vs/workbench/contrib/chat/common/chatService/chatService.ts +++ b/src/vs/workbench/contrib/chat/common/chatService/chatService.ts @@ -151,6 +151,7 @@ export interface IChatUsagePromptTokenDetail { export interface IChatUsage { promptTokens: number; completionTokens: number; + outputBuffer?: number; promptTokenDetails?: readonly IChatUsagePromptTokenDetail[]; kind: 'usage'; } diff --git a/src/vscode-dts/vscode.proposed.chatParticipantAdditions.d.ts b/src/vscode-dts/vscode.proposed.chatParticipantAdditions.d.ts index 9143c72c08d..286b87dd85c 100644 --- a/src/vscode-dts/vscode.proposed.chatParticipantAdditions.d.ts +++ b/src/vscode-dts/vscode.proposed.chatParticipantAdditions.d.ts @@ -839,6 +839,12 @@ declare module 'vscode' { */ readonly completionTokens: number; + /** + * The number of tokens reserved for the response. + * This is rendered specially in the UI to indicate that these tokens aren't used but are reserved. + */ + readonly outputBuffer?: number; + /** * Optional breakdown of prompt token usage by category and label. * If the percentages do not sum to 100%, the remaining will be shown as "Uncategorized".