diff --git a/extensions/copilot/src/extension/intents/node/toolCallingLoop.ts b/extensions/copilot/src/extension/intents/node/toolCallingLoop.ts index 57e1b1ae66c..96aed432d98 100644 --- a/extensions/copilot/src/extension/intents/node/toolCallingLoop.ts +++ b/extensions/copilot/src/extension/intents/node/toolCallingLoop.ts @@ -176,6 +176,7 @@ export abstract class ToolCallingLoop('IBuildPromptContext'); diff --git a/extensions/copilot/src/extension/prompt/node/chatMLFetcher.ts b/extensions/copilot/src/extension/prompt/node/chatMLFetcher.ts index 8207853f597..fe7017eae86 100644 --- a/extensions/copilot/src/extension/prompt/node/chatMLFetcher.ts +++ b/extensions/copilot/src/extension/prompt/node/chatMLFetcher.ts @@ -217,6 +217,7 @@ export class ChatMLFetcherImpl extends AbstractChatMLFetcher { const countTokens = () => tokenCountPromise ??= chatEndpoint.acquireTokenizer().countMessagesTokens(messages); const copilotToken = await this._authenticationService.getCopilotToken(); usernameToScrub = copilotToken.username; + const fetchResult = await this._fetchAndStreamChat( chatEndpoint, requestBody, diff --git a/extensions/copilot/src/extension/prompt/node/executionSubagentToolCallingLoop.ts b/extensions/copilot/src/extension/prompt/node/executionSubagentToolCallingLoop.ts index 5789ccac0bc..e87c2282218 100644 --- a/extensions/copilot/src/extension/prompt/node/executionSubagentToolCallingLoop.ts +++ b/extensions/copilot/src/extension/prompt/node/executionSubagentToolCallingLoop.ts @@ -37,6 +37,8 @@ export interface IExecutionSubagentToolCallingLoopOptions extends IToolCallingLo subAgentInvocationId?: string; /** The tool_call_id from the parent agent's LLM response that triggered this subagent invocation. */ parentToolCallId?: string; + /** The headerRequestId from the parent agent's fetch response that triggered this subagent invocation. */ + parentHeaderRequestId?: string; } export class ExecutionSubagentToolCallingLoop extends ToolCallingLoop { @@ -161,6 +163,7 @@ export class ExecutionSubagentToolCallingLoop extends ToolCallingLoop { promptText: options.input.query, subAgentInvocationId: subAgentInvocationId, parentToolCallId: options.chatStreamToolCallId, + parentHeaderRequestId: this._inputContext?.parentHeaderRequestId, }); const stream = this._inputContext?.stream && ChatResponseStreamImpl.filter( diff --git a/extensions/copilot/src/extension/tools/node/searchSubagentTool.ts b/extensions/copilot/src/extension/tools/node/searchSubagentTool.ts index aa12bfe39c8..24807c977dd 100644 --- a/extensions/copilot/src/extension/tools/node/searchSubagentTool.ts +++ b/extensions/copilot/src/extension/tools/node/searchSubagentTool.ts @@ -124,6 +124,7 @@ class SearchSubagentTool implements ICopilotTool { promptText: options.input.query, subAgentInvocationId: subAgentInvocationId, parentToolCallId: options.chatStreamToolCallId, + parentHeaderRequestId: this._inputContext?.parentHeaderRequestId, thoroughness: thoroughnessEnabled ? options.input.thoroughness : undefined, }); diff --git a/extensions/copilot/src/platform/endpoint/node/messagesApi.ts b/extensions/copilot/src/platform/endpoint/node/messagesApi.ts index 5c8b0be0542..9164caac649 100644 --- a/extensions/copilot/src/platform/endpoint/node/messagesApi.ts +++ b/extensions/copilot/src/platform/endpoint/node/messagesApi.ts @@ -654,7 +654,13 @@ export async function processResponseFromMessagesEndpoint( telemetryDataWithUsage = telemetryData.extendedBy({}, { promptTokens: completion.usage.prompt_tokens, completionTokens: completion.usage.completion_tokens, - totalTokens: completion.usage.total_tokens + totalTokens: completion.usage.total_tokens, + ...(completion.usage.prompt_tokens_details && { cachedTokens: completion.usage.prompt_tokens_details.cached_tokens }), + ...(completion.usage.completion_tokens_details && { + reasoningTokens: completion.usage.completion_tokens_details.reasoning_tokens, + acceptedPredictionTokens: completion.usage.completion_tokens_details.accepted_prediction_tokens, + rejectedPredictionTokens: completion.usage.completion_tokens_details.rejected_prediction_tokens, + }), }); } sendEngineMessagesTelemetry(telemetryService, [telemetryMessage], telemetryDataWithUsage, true, logService); diff --git a/extensions/copilot/src/platform/endpoint/node/responsesApi.ts b/extensions/copilot/src/platform/endpoint/node/responsesApi.ts index a42223b0aa5..ad8fd20459f 100644 --- a/extensions/copilot/src/platform/endpoint/node/responsesApi.ts +++ b/extensions/copilot/src/platform/endpoint/node/responsesApi.ts @@ -832,6 +832,12 @@ export function sendCompletionOutputTelemetry(telemetryService: ITelemetryServic promptTokens: completion.usage.prompt_tokens, completionTokens: completion.usage.completion_tokens, totalTokens: completion.usage.total_tokens, + ...(completion.usage.prompt_tokens_details && { cachedTokens: completion.usage.prompt_tokens_details.cached_tokens }), + ...(completion.usage.completion_tokens_details && { + reasoningTokens: completion.usage.completion_tokens_details.reasoning_tokens, + acceptedPredictionTokens: completion.usage.completion_tokens_details.accepted_prediction_tokens, + rejectedPredictionTokens: completion.usage.completion_tokens_details.rejected_prediction_tokens, + }), }); } sendEngineMessagesTelemetry(telemetryService, [telemetryMessage], telemetryDataWithUsage, true, logService); diff --git a/extensions/copilot/src/platform/networking/common/networking.ts b/extensions/copilot/src/platform/networking/common/networking.ts index 3bdc4d7dd56..e952ccb1b2a 100644 --- a/extensions/copilot/src/platform/networking/common/networking.ts +++ b/extensions/copilot/src/platform/networking/common/networking.ts @@ -231,6 +231,8 @@ export type IChatRequestTelemetryProperties = { parentRequestId?: string; /** For a subagent: The tool_call_id from the parent agent's LLM response that triggered this subagent invocation. */ parentToolCallId?: string; + /** For a subagent: The headerRequestId from the parent agent's fetch response that triggered this subagent invocation. */ + parentHeaderRequestId?: string; }; export interface ICreateEndpointBodyOptions extends IMakeChatRequestOptions { diff --git a/extensions/copilot/src/platform/networking/node/chatStream.ts b/extensions/copilot/src/platform/networking/node/chatStream.ts index 385ba7bf53c..d01e07ee3a0 100644 --- a/extensions/copilot/src/platform/networking/node/chatStream.ts +++ b/extensions/copilot/src/platform/networking/node/chatStream.ts @@ -315,6 +315,7 @@ function sendIndividualMessagesTelemetry(telemetryService: ITelemetryService, me // Convert message to JSON string for chunking const messageJsonString = JSON.stringify(message); + const maxChunkSize = 8000; // Split messageJson into chunks of 8000 characters or less @@ -391,6 +392,7 @@ function sendModelCallTelemetry(telemetryService: ITelemetryService, messageData // Send one telemetry event per chunk for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex++) { const parentToolCallId = telemetryData.properties.parentToolCallId; + const parentHeaderRequestId = telemetryData.properties.parentHeaderRequestId; const modelCallData = TelemetryData.createAndMarkAsIssued({ modelCallId, conversationId, // Trajectory identifier linking main and supplementary calls @@ -404,6 +406,7 @@ function sendModelCallTelemetry(telemetryService: ITelemetryService, messageData ...(requestOptionsId && { requestOptionsId }), // Add requestOptionsId for input calls ...(telemetryData.properties.turnIndex && { turnIndex: telemetryData.properties.turnIndex }), // Add turnIndex from original telemetryData ...(parentToolCallId && { parentToolCallId }), // Link subagent calls to parent tool invocation + ...(parentHeaderRequestId && { parentHeaderRequestId }), // Link subagent calls to parent HTTP request }, telemetryData.measurements); // Include measurements from original telemetryData telemetryService.sendInternalMSFTTelemetryEvent(eventName, modelCallData.properties, modelCallData.measurements); @@ -447,6 +450,7 @@ export function sendEngineMessagesTelemetry(telemetryService: ITelemetryService, const telemetryDataWithPrompt = telemetryData.extendedBy({ messagesJson: JSON.stringify(messages), }); + telemetryService.sendEnhancedGHTelemetryEvent('engine.messages', multiplexProperties(telemetryDataWithPrompt.properties), telemetryDataWithPrompt.measurements); // Commenting this out to test a new deduplicated way to collect the same information using sendModelTelemetryEvents() // TO DO remove this line completely if the new way allows for complete reconstruction of entire message arrays with much lower drop rate @@ -537,7 +541,13 @@ export function prepareChatCompletionForReturn( telemetryDataWithUsage = telemetryData.extendedBy({}, { promptTokens: c.usage.prompt_tokens, completionTokens: c.usage.completion_tokens, - totalTokens: c.usage.total_tokens + totalTokens: c.usage.total_tokens, + ...(c.usage.prompt_tokens_details && { cachedTokens: c.usage.prompt_tokens_details.cached_tokens }), + ...(c.usage.completion_tokens_details && { + reasoningTokens: c.usage.completion_tokens_details.reasoning_tokens, + acceptedPredictionTokens: c.usage.completion_tokens_details.accepted_prediction_tokens, + rejectedPredictionTokens: c.usage.completion_tokens_details.rejected_prediction_tokens, + }), }); }