Yemohyle/add to telemetry (#311837)

* add last asst messages logs

* add last asst messages logs ...

* add last asst messages logs ....

* add last asst messages logs .....

* add assistant messages added logs

* ...

* add tokens info

* add parentHeaderrequestId value to subagent telemetry

* remove debug logging

* Apply suggestion from @Copilot

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* suggested change

* extra verification

* revert suggested change

* remove debug logs

* add cashed tokens to Legasy SSE path

---------

Co-authored-by: Yevhen Mohylevskyy <yevhenmohylevskyy@Yevhens-MacBook-Pro-2.local>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
yemohyleyemohyle
2026-04-28 10:59:04 -07:00
committed by GitHub
parent 47b95823d2
commit ffc280ded2
11 changed files with 50 additions and 2 deletions
@@ -176,6 +176,7 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
private agentSpan: ISpanHandle | undefined;
private chatSessionIdForTools: string | undefined;
private toolsAvailableEmitted = false;
private lastHeaderRequestId: string | undefined;
public appendAdditionalHookContext(context: string): void {
if (!context) {
@@ -269,6 +270,7 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
hasStopHookQuery,
modeInstructions: this.options.request.modeInstructions2,
additionalHookContext: this.additionalHookContext,
parentHeaderRequestId: this.lastHeaderRequestId,
};
}
@@ -1383,6 +1385,14 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
});
markChatExt(this.options.conversation.sessionId, ChatExtPerfMark.DidFetch);
// Store the headerRequestId from the fetch response for subagent telemetry linking.
// Use requestId (the client-generated UUID sent as X-Request-Id header), not serverRequestId
// (the server's response header value), because requestId is what appears as headerRequestId
// across all telemetry events.
if (fetchResult.type === ChatFetchResponseType.Success) {
this.lastHeaderRequestId = fetchResult.requestId;
}
const promptTokenDetails = await computePromptTokenDetails({
messages: effectiveBuildPromptResult.messages,
tokenizer,
@@ -120,6 +120,11 @@ export interface IBuildPromptContext {
* Additional context provided by a hook.
*/
readonly additionalHookContext?: string;
/**
* The headerRequestId from the most recent parent fetch response.
* Used by subagent tools to link their telemetry back to the parent's HTTP request.
*/
readonly parentHeaderRequestId?: string;
}
export const IBuildPromptContext = createServiceIdentifier<IBuildPromptContext>('IBuildPromptContext');
@@ -217,6 +217,7 @@ export class ChatMLFetcherImpl extends AbstractChatMLFetcher {
const countTokens = () => tokenCountPromise ??= chatEndpoint.acquireTokenizer().countMessagesTokens(messages);
const copilotToken = await this._authenticationService.getCopilotToken();
usernameToScrub = copilotToken.username;
const fetchResult = await this._fetchAndStreamChat(
chatEndpoint,
requestBody,
@@ -37,6 +37,8 @@ export interface IExecutionSubagentToolCallingLoopOptions extends IToolCallingLo
subAgentInvocationId?: string;
/** The tool_call_id from the parent agent's LLM response that triggered this subagent invocation. */
parentToolCallId?: string;
/** The headerRequestId from the parent agent's fetch response that triggered this subagent invocation. */
parentHeaderRequestId?: string;
}
export class ExecutionSubagentToolCallingLoop extends ToolCallingLoop<IExecutionSubagentToolCallingLoopOptions> {
@@ -161,6 +163,7 @@ export class ExecutionSubagentToolCallingLoop extends ToolCallingLoop<IExecution
subType: 'subagent/execution',
conversationId: this.options.conversation.sessionId,
parentToolCallId: this.options.parentToolCallId,
parentHeaderRequestId: this.options.parentHeaderRequestId,
},
}, token);
}
@@ -37,6 +37,8 @@ export interface ISearchSubagentToolCallingLoopOptions extends IToolCallingLoopO
subAgentInvocationId?: string;
/** The tool_call_id from the parent agent's LLM response that triggered this subagent invocation. */
parentToolCallId?: string;
/** The headerRequestId from the parent agent's fetch response that triggered this subagent invocation. */
parentHeaderRequestId?: string;
/** Thoroughness level for the search, passed through to the prompt when thoroughnessEnabled config is on. */
thoroughness?: 'normal' | 'deep';
}
@@ -164,6 +166,7 @@ export class SearchSubagentToolCallingLoop extends ToolCallingLoop<ISearchSubage
subType: 'subagent/search',
conversationId: this.options.conversation.sessionId,
parentToolCallId: this.options.parentToolCallId,
parentHeaderRequestId: this.options.parentHeaderRequestId,
},
requestKindOptions: { kind: 'subagent' }
}, token);
@@ -68,6 +68,7 @@ class ExecutionSubagentTool implements ICopilotTool<IExecutionSubagentParams> {
promptText: options.input.query,
subAgentInvocationId: subAgentInvocationId,
parentToolCallId: options.chatStreamToolCallId,
parentHeaderRequestId: this._inputContext?.parentHeaderRequestId,
});
const stream = this._inputContext?.stream && ChatResponseStreamImpl.filter(
@@ -124,6 +124,7 @@ class SearchSubagentTool implements ICopilotTool<ISearchSubagentParams> {
promptText: options.input.query,
subAgentInvocationId: subAgentInvocationId,
parentToolCallId: options.chatStreamToolCallId,
parentHeaderRequestId: this._inputContext?.parentHeaderRequestId,
thoroughness: thoroughnessEnabled ? options.input.thoroughness : undefined,
});
@@ -654,7 +654,13 @@ export async function processResponseFromMessagesEndpoint(
telemetryDataWithUsage = telemetryData.extendedBy({}, {
promptTokens: completion.usage.prompt_tokens,
completionTokens: completion.usage.completion_tokens,
totalTokens: completion.usage.total_tokens
totalTokens: completion.usage.total_tokens,
...(completion.usage.prompt_tokens_details && { cachedTokens: completion.usage.prompt_tokens_details.cached_tokens }),
...(completion.usage.completion_tokens_details && {
reasoningTokens: completion.usage.completion_tokens_details.reasoning_tokens,
acceptedPredictionTokens: completion.usage.completion_tokens_details.accepted_prediction_tokens,
rejectedPredictionTokens: completion.usage.completion_tokens_details.rejected_prediction_tokens,
}),
});
}
sendEngineMessagesTelemetry(telemetryService, [telemetryMessage], telemetryDataWithUsage, true, logService);
@@ -832,6 +832,12 @@ export function sendCompletionOutputTelemetry(telemetryService: ITelemetryServic
promptTokens: completion.usage.prompt_tokens,
completionTokens: completion.usage.completion_tokens,
totalTokens: completion.usage.total_tokens,
...(completion.usage.prompt_tokens_details && { cachedTokens: completion.usage.prompt_tokens_details.cached_tokens }),
...(completion.usage.completion_tokens_details && {
reasoningTokens: completion.usage.completion_tokens_details.reasoning_tokens,
acceptedPredictionTokens: completion.usage.completion_tokens_details.accepted_prediction_tokens,
rejectedPredictionTokens: completion.usage.completion_tokens_details.rejected_prediction_tokens,
}),
});
}
sendEngineMessagesTelemetry(telemetryService, [telemetryMessage], telemetryDataWithUsage, true, logService);
@@ -231,6 +231,8 @@ export type IChatRequestTelemetryProperties = {
parentRequestId?: string;
/** For a subagent: The tool_call_id from the parent agent's LLM response that triggered this subagent invocation. */
parentToolCallId?: string;
/** For a subagent: The headerRequestId from the parent agent's fetch response that triggered this subagent invocation. */
parentHeaderRequestId?: string;
};
export interface ICreateEndpointBodyOptions extends IMakeChatRequestOptions {
@@ -315,6 +315,7 @@ function sendIndividualMessagesTelemetry(telemetryService: ITelemetryService, me
// Convert message to JSON string for chunking
const messageJsonString = JSON.stringify(message);
const maxChunkSize = 8000;
// Split messageJson into chunks of 8000 characters or less
@@ -391,6 +392,7 @@ function sendModelCallTelemetry(telemetryService: ITelemetryService, messageData
// Send one telemetry event per chunk
for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex++) {
const parentToolCallId = telemetryData.properties.parentToolCallId;
const parentHeaderRequestId = telemetryData.properties.parentHeaderRequestId;
const modelCallData = TelemetryData.createAndMarkAsIssued({
modelCallId,
conversationId, // Trajectory identifier linking main and supplementary calls
@@ -404,6 +406,7 @@ function sendModelCallTelemetry(telemetryService: ITelemetryService, messageData
...(requestOptionsId && { requestOptionsId }), // Add requestOptionsId for input calls
...(telemetryData.properties.turnIndex && { turnIndex: telemetryData.properties.turnIndex }), // Add turnIndex from original telemetryData
...(parentToolCallId && { parentToolCallId }), // Link subagent calls to parent tool invocation
...(parentHeaderRequestId && { parentHeaderRequestId }), // Link subagent calls to parent HTTP request
}, telemetryData.measurements); // Include measurements from original telemetryData
telemetryService.sendInternalMSFTTelemetryEvent(eventName, modelCallData.properties, modelCallData.measurements);
@@ -447,6 +450,7 @@ export function sendEngineMessagesTelemetry(telemetryService: ITelemetryService,
const telemetryDataWithPrompt = telemetryData.extendedBy({
messagesJson: JSON.stringify(messages),
});
telemetryService.sendEnhancedGHTelemetryEvent('engine.messages', multiplexProperties(telemetryDataWithPrompt.properties), telemetryDataWithPrompt.measurements);
// Commenting this out to test a new deduplicated way to collect the same information using sendModelTelemetryEvents()
// TO DO remove this line completely if the new way allows for complete reconstruction of entire message arrays with much lower drop rate
@@ -537,7 +541,13 @@ export function prepareChatCompletionForReturn(
telemetryDataWithUsage = telemetryData.extendedBy({}, {
promptTokens: c.usage.prompt_tokens,
completionTokens: c.usage.completion_tokens,
totalTokens: c.usage.total_tokens
totalTokens: c.usage.total_tokens,
...(c.usage.prompt_tokens_details && { cachedTokens: c.usage.prompt_tokens_details.cached_tokens }),
...(c.usage.completion_tokens_details && {
reasoningTokens: c.usage.completion_tokens_details.reasoning_tokens,
acceptedPredictionTokens: c.usage.completion_tokens_details.accepted_prediction_tokens,
rejectedPredictionTokens: c.usage.completion_tokens_details.rejected_prediction_tokens,
}),
});
}