Support rendering reserved output separately (#299867)

* Support rendering reserved output separately

* Fix some of the progress bar logic

* Better handling for reserve
This commit is contained in:
Logan Ramos
2026-03-06 16:38:40 -05:00
committed by GitHub
parent ca433bc500
commit f3680f6a81
8 changed files with 130 additions and 20 deletions

View File

@@ -411,6 +411,7 @@ export class MainThreadChatAgents2 extends Disposable implements MainThreadChatA
kind: 'usage',
promptTokens: progress.promptTokens,
completionTokens: progress.completionTokens,
outputBuffer: progress.outputBuffer,
promptTokenDetails: progress.promptTokenDetails
});
}

View File

@@ -2528,6 +2528,7 @@ export interface IChatUsageDto {
kind: 'usage';
promptTokens: number;
completionTokens: number;
outputBuffer?: number;
promptTokenDetails?: readonly { category: string; label: string; percentageOfPrompt: number }[];
}

View File

@@ -440,6 +440,7 @@ export class ChatAgentResponseStream {
kind: 'usage',
promptTokens: usage.promptTokens,
completionTokens: usage.completionTokens,
outputBuffer: usage.outputBuffer,
promptTokenDetails: usage.promptTokenDetails
};
_report(dto);

View File

@@ -22,8 +22,10 @@ export interface IChatContextUsagePromptTokenDetail {
export interface IChatContextUsageData {
usedTokens: number;
completionTokens: number;
totalContextWindow: number;
percentage: number;
outputBufferPercentage?: number;
promptTokenDetails?: readonly IChatContextUsagePromptTokenDetail[];
}
@@ -39,6 +41,8 @@ export class ChatContextUsageDetails extends Disposable {
private readonly percentageLabel: HTMLElement;
private readonly tokenCountLabel: HTMLElement;
private readonly progressFill: HTMLElement;
private readonly outputBufferFill: HTMLElement;
private readonly outputBufferLegend: HTMLElement;
private readonly tokenDetailsContainer: HTMLElement;
private readonly warningMessage: HTMLElement;
private readonly actionsSection: HTMLElement;
@@ -67,6 +71,14 @@ export class ChatContextUsageDetails extends Disposable {
// Progress bar
const progressBar = this.quotaItem.appendChild($('.quota-bar'));
this.progressFill = progressBar.appendChild($('.quota-bit'));
this.outputBufferFill = progressBar.appendChild($('.quota-bit.output-buffer'));
// Output buffer legend (shown only when outputBuffer is provided)
this.outputBufferLegend = this.quotaItem.appendChild($('.output-buffer-legend'));
this.outputBufferLegend.appendChild($('.output-buffer-swatch'));
const legendLabel = this.outputBufferLegend.appendChild($('span'));
legendLabel.textContent = localize('outputReserved', "Reserved for response");
this.outputBufferLegend.style.display = 'none';
// Token details container (for category breakdown)
this.tokenDetailsContainer = this.domNode.appendChild($('.token-details-container'));
@@ -98,25 +110,39 @@ export class ChatContextUsageDetails extends Disposable {
}
update(data: IChatContextUsageData): void {
const { percentage, usedTokens, totalContextWindow, promptTokenDetails } = data;
const { percentage, usedTokens, totalContextWindow, outputBufferPercentage, promptTokenDetails } = data;
// Update token count and percentage
// Update token count and percentage — reflects actual usage only
this.tokenCountLabel.textContent = localize(
'tokenCount',
"{0} / {1} tokens",
this.formatTokenCount(usedTokens, 1),
this.formatTokenCount(totalContextWindow, 0)
);
this.percentageLabel.textContent = localize('quotaDisplay', "{0}%", percentage.toFixed(0));
this.percentageLabel.textContent = localize('quotaDisplay', "{0}%", Math.min(100, percentage).toFixed(0));
// Update progress bar
this.progressFill.style.width = `${Math.min(100, percentage)}%`;
// Progress bar: actual usage fill + remaining reserved output fill
const usageBarWidth = Math.max(0, Math.min(100, percentage));
this.progressFill.style.width = `${usageBarWidth}%`;
// Update color classes based on usage level on the quota item
if (outputBufferPercentage !== undefined && outputBufferPercentage > 0) {
// Clamp so the reserve never overflows the bar
this.outputBufferFill.style.width = `${Math.max(0, Math.min(100 - usageBarWidth, outputBufferPercentage))}%`;
this.outputBufferFill.style.display = '';
this.outputBufferLegend.style.display = '';
} else {
this.outputBufferFill.style.width = '0';
this.outputBufferFill.style.display = 'none';
this.outputBufferLegend.style.display = 'none';
}
// Color classes based on total spoken-for percentage
// (actual usage + remaining reserve)
const effectivePercentage = percentage + (outputBufferPercentage ?? 0);
this.quotaItem.classList.remove('warning', 'error');
if (percentage >= 90) {
if (effectivePercentage >= 90) {
this.quotaItem.classList.add('error');
} else if (percentage >= 75) {
} else if (effectivePercentage >= 75) {
this.quotaItem.classList.add('warning');
}

View File

@@ -274,32 +274,42 @@ export class ChatContextUsageWidget extends Disposable {
}
const promptTokens = usage.promptTokens;
const completionTokens = usage.completionTokens;
const promptTokenDetails = usage.promptTokenDetails;
const outputBuffer = usage.outputBuffer;
const totalContextWindow = maxInputTokens + maxOutputTokens;
const usedTokens = promptTokens + maxOutputTokens;
const percentage = Math.min(100, (usedTokens / totalContextWindow) * 100);
const usedTokens = promptTokens + completionTokens;
const percentage = (usedTokens / totalContextWindow) * 100;
this.render(percentage, usedTokens, totalContextWindow, promptTokenDetails);
// Remaining reserve = whatever the model reserved minus what completions
// have already consumed. Once completions exceed the reserve, it drops to 0.
const outputBufferPercentage = outputBuffer !== undefined
? (Math.max(0, outputBuffer - completionTokens) / totalContextWindow) * 100
: undefined;
this.render(percentage, completionTokens, usedTokens, totalContextWindow, outputBufferPercentage, promptTokenDetails);
this.show();
}
private render(percentage: number, usedTokens: number, totalContextWindow: number, promptTokenDetails?: readonly { category: string; label: string; percentageOfPrompt: number }[]): void {
private render(percentage: number, completionTokens: number, usedTokens: number, totalContextWindow: number, outputBufferPercentage: number | undefined, promptTokenDetails?: readonly { category: string; label: string; percentageOfPrompt: number }[]): void {
// Store current data for use in details popup
this.currentData = { usedTokens, totalContextWindow, percentage, promptTokenDetails };
this.currentData = { usedTokens, completionTokens, totalContextWindow, percentage, outputBufferPercentage, promptTokenDetails };
// Update pie chart progress
this.progressIndicator.setProgress(percentage);
// Pie chart shows actual usage + remaining reserve so the user can see
// how much of the context window is spoken for.
this.progressIndicator.setProgress(percentage + (outputBufferPercentage ?? 0));
// Update percentage label and aria-label
const roundedPercentage = Math.round(percentage);
// Update percentage label and aria-label (clamp display to 100)
const roundedPercentage = Math.min(100, Math.round(percentage));
this.percentageLabel.textContent = `${roundedPercentage}%`;
this.domNode.setAttribute('aria-label', localize('contextUsagePercentageLabel', "Context window usage: {0}%", roundedPercentage));
// Update color based on usage level
// Color based on total spoken-for percentage (usage + remaining reserve)
const effectivePercentage = percentage + (outputBufferPercentage ?? 0);
this.domNode.classList.remove('warning', 'error');
if (percentage >= 90) {
if (effectivePercentage >= 90) {
this.domNode.classList.add('error');
} else if (percentage >= 75) {
} else if (effectivePercentage >= 75) {
this.domNode.classList.add('warning');
}
}

View File

@@ -52,6 +52,7 @@
border-radius: 4px;
border: 1px solid var(--vscode-gauge-border);
margin: 4px 0;
display: flex;
}
.chat-context-usage-details .quota-indicator .quota-bar .quota-bit {
@@ -61,6 +62,45 @@
transition: width 0.3s ease;
}
.chat-context-usage-details .quota-indicator .quota-bar .quota-bit.output-buffer {
background: repeating-linear-gradient(
-45deg,
var(--vscode-gauge-foreground),
var(--vscode-gauge-foreground) 2px,
transparent 2px,
transparent 4px
);
border-radius: 0 4px 4px 0;
}
.chat-context-usage-details .quota-indicator .quota-bar .quota-bit:not(.output-buffer):has(+ .quota-bit.output-buffer:not([style*="display: none"])) {
border-radius: 4px 0 0 4px;
}
/* Output buffer legend */
.chat-context-usage-details .quota-indicator .output-buffer-legend {
display: flex;
align-items: center;
gap: 6px;
margin-top: 4px;
font-size: 11px;
color: var(--vscode-descriptionForeground);
}
.chat-context-usage-details .quota-indicator .output-buffer-legend .output-buffer-swatch {
width: 12px;
height: 8px;
border-radius: 2px;
background: repeating-linear-gradient(
-45deg,
var(--vscode-gauge-foreground),
var(--vscode-gauge-foreground) 2px,
transparent 2px,
transparent 4px
);
flex-shrink: 0;
}
.chat-context-usage-details .quota-indicator.warning .quota-bar {
background-color: var(--vscode-gauge-warningBackground);
}
@@ -69,6 +109,16 @@
background-color: var(--vscode-gauge-warningForeground);
}
.chat-context-usage-details .quota-indicator.warning .quota-bar .quota-bit.output-buffer {
background: repeating-linear-gradient(
-45deg,
var(--vscode-gauge-warningForeground),
var(--vscode-gauge-warningForeground) 2px,
transparent 2px,
transparent 4px
);
}
.chat-context-usage-details .quota-indicator.error .quota-bar {
background-color: var(--vscode-gauge-errorBackground);
}
@@ -77,6 +127,16 @@
background-color: var(--vscode-gauge-errorForeground);
}
.chat-context-usage-details .quota-indicator.error .quota-bar .quota-bit.output-buffer {
background: repeating-linear-gradient(
-45deg,
var(--vscode-gauge-errorForeground),
var(--vscode-gauge-errorForeground) 2px,
transparent 2px,
transparent 4px
);
}
/* Description / warning text — matching ChatStatusDashboard */
.chat-context-usage-details div.description {
font-size: 11px;
@@ -100,6 +160,10 @@
font-weight: 600;
}
.chat-context-usage-details .token-category:first-child .token-category-header {
margin-top: 8px;
}
.chat-context-usage-details .token-detail-item {
display: flex;
justify-content: space-between;

View File

@@ -151,6 +151,7 @@ export interface IChatUsagePromptTokenDetail {
export interface IChatUsage {
promptTokens: number;
completionTokens: number;
outputBuffer?: number;
promptTokenDetails?: readonly IChatUsagePromptTokenDetail[];
kind: 'usage';
}

View File

@@ -839,6 +839,12 @@ declare module 'vscode' {
*/
readonly completionTokens: number;
/**
* The number of tokens reserved for the response.
* This is rendered specially in the UI to indicate that these tokens aren't used but are reserved.
*/
readonly outputBuffer?: number;
/**
* Optional breakdown of prompt token usage by category and label.
* If the percentages do not sum to 100%, the remaining will be shown as "Uncategorized".