/*--------------------------------------------------------------------------------------------- * Copyright (c) Microsoft Corporation. All rights reserved. * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ // @ts-check /** * Local mock server that implements the OpenAI Chat Completions streaming API. * Used by the chat perf benchmark to replace the real LLM backend with * deterministic, zero-latency responses. * * Supports scenario-based responses: the `messages` array's last user message * content is matched against scenario IDs. Unknown scenarios get a default * text-only response. */ const http = require('http'); const path = require('path'); const { EventEmitter } = require('events'); const ROOT = path.join(__dirname, '..', '..', '..'); // -- Scenario fixtures ------------------------------------------------------- /** * @typedef {{ content: string, delayMs: number }} StreamChunk */ /** * A single turn in a multi-turn scenario. * * @typedef {{ * kind: 'tool-calls', * toolCalls: Array<{ toolNamePattern: RegExp, arguments: Record }>, * } | { * kind: 'content', * chunks: StreamChunk[], * } | { * kind: 'thinking', * thinkingChunks: StreamChunk[], * chunks: StreamChunk[], * } | { * kind: 'user', * message: string, * }} ScenarioTurn */ /** * A scenario turn produced by the model. * * @typedef {{ * kind: 'tool-calls', * toolCalls: Array<{ toolNamePattern: RegExp, arguments: Record }>, * } | { * kind: 'content', * chunks: StreamChunk[], * } | { * kind: 'thinking', * thinkingChunks: StreamChunk[], * chunks: StreamChunk[], * }} ModelScenarioTurn */ /** * A model turn that emits content chunks. * * @typedef {{ * kind: 'content', * chunks: StreamChunk[], * } | { * kind: 'thinking', * thinkingChunks: StreamChunk[], * chunks: StreamChunk[], * }} ContentScenarioTurn */ /** * A multi-turn scenario — an ordered sequence of turns. * The mock server determines which model turn to serve based on the number * of assistant→tool round-trips already present in the conversation. * User turns are skipped by the server and instead injected by the test * harness, which types them into the chat input and presses Enter. * * @typedef {{ * type: 'multi-turn', * turns: ScenarioTurn[], * }} MultiTurnScenario */ /** * @param {any} scenario * @returns {scenario is MultiTurnScenario} */ function isMultiTurnScenario(scenario) { return scenario && typeof scenario === 'object' && scenario.type === 'multi-turn'; } /** * Helper for building scenario chunk sequences with timing control. */ class ScenarioBuilder { constructor() { /** @type {StreamChunk[]} */ this.chunks = []; } /** * Emit a content chunk immediately (no delay before it). * @param {string} content * @returns {this} */ emit(content) { this.chunks.push({ content, delayMs: 0 }); return this; } /** * Wait, then emit a content chunk — simulates network/token generation latency. * @param {number} ms - delay in milliseconds before this chunk * @param {string} content * @returns {this} */ wait(ms, content) { this.chunks.push({ content, delayMs: ms }); return this; } /** * Emit multiple chunks with uniform inter-chunk delay. * @param {string[]} contents * @param {number} [delayMs=15] - delay between each chunk (default ~1 frame) * @returns {this} */ stream(contents, delayMs = 15) { for (const content of contents) { this.chunks.push({ content, delayMs }); } return this; } /** * Emit multiple chunks with no delay (burst). * @param {string[]} contents * @returns {this} */ burst(contents) { return this.stream(contents, 0); } /** @returns {StreamChunk[]} */ build() { return this.chunks; } } /** @type {Record} */ const SCENARIOS = /** @type {Record} */ ({}); const DEFAULT_SCENARIO = 'text-only'; /** * @returns {StreamChunk[]} */ function getDefaultScenarioChunks() { const scenario = SCENARIOS[DEFAULT_SCENARIO]; if (isMultiTurnScenario(scenario)) { throw new Error(`Default scenario '${DEFAULT_SCENARIO}' must be content-only`); } return scenario; } // -- SSE chunk builder ------------------------------------------------------- const MODEL = 'gpt-4o-2024-08-06'; /** * @param {string} content * @param {number} index * @param {boolean} finish */ function makeChunk(content, index, finish) { return { id: 'chatcmpl-perf-benchmark', object: 'chat.completion.chunk', created: Math.floor(Date.now() / 1000), model: MODEL, choices: [{ index: 0, delta: finish ? {} : { content }, finish_reason: finish ? 'stop' : null, content_filter_results: {}, }], usage: null, }; } function makeInitialChunk() { return { id: 'chatcmpl-perf-benchmark', object: 'chat.completion.chunk', created: Math.floor(Date.now() / 1000), model: MODEL, choices: [{ index: 0, delta: { role: 'assistant', content: '' }, finish_reason: null, content_filter_results: {}, }], usage: null, }; } /** * Build a tool-call initial chunk (role only, no content). */ function makeToolCallInitialChunk() { return { id: 'chatcmpl-perf-benchmark', object: 'chat.completion.chunk', created: Math.floor(Date.now() / 1000), model: MODEL, choices: [{ index: 0, delta: { role: 'assistant', content: null }, finish_reason: null, content_filter_results: {}, }], usage: null, }; } /** * Build a tool-call function-start chunk. * @param {number} index - tool call index * @param {string} callId - unique call ID * @param {string} functionName - tool function name */ function makeToolCallStartChunk(index, callId, functionName) { return { id: 'chatcmpl-perf-benchmark', object: 'chat.completion.chunk', created: Math.floor(Date.now() / 1000), model: MODEL, choices: [{ index: 0, delta: { tool_calls: [{ index, id: callId, type: 'function', function: { name: functionName, arguments: '' }, }], }, finish_reason: null, content_filter_results: {}, }], usage: null, }; } /** * Build a tool-call arguments chunk. * @param {number} index - tool call index * @param {string} argsFragment - partial JSON arguments */ function makeToolCallArgsChunk(index, argsFragment) { return { id: 'chatcmpl-perf-benchmark', object: 'chat.completion.chunk', created: Math.floor(Date.now() / 1000), model: MODEL, choices: [{ index: 0, delta: { tool_calls: [{ index, function: { arguments: argsFragment }, }], }, finish_reason: null, content_filter_results: {}, }], usage: null, }; } /** * Build a tool-call finish chunk. */ function makeToolCallFinishChunk() { return { id: 'chatcmpl-perf-benchmark', object: 'chat.completion.chunk', created: Math.floor(Date.now() / 1000), model: MODEL, choices: [{ index: 0, delta: {}, finish_reason: 'tool_calls', content_filter_results: {}, }], usage: null, }; } /** * Build a thinking (chain-of-thought summary) chunk. * Uses the `cot_summary` field in the delta, matching the Copilot API wire format. * @param {string} text - thinking text fragment */ function makeThinkingChunk(text) { return { id: 'chatcmpl-perf-benchmark', object: 'chat.completion.chunk', created: Math.floor(Date.now() / 1000), model: MODEL, choices: [{ index: 0, delta: { cot_summary: text }, finish_reason: null, content_filter_results: {}, }], usage: null, }; } /** * Build a thinking ID chunk (sent after thinking text to close the block). * @param {string} cotId - unique chain-of-thought ID */ function makeThinkingIdChunk(cotId) { return { id: 'chatcmpl-perf-benchmark', object: 'chat.completion.chunk', created: Math.floor(Date.now() / 1000), model: MODEL, choices: [{ index: 0, delta: { cot_id: cotId }, finish_reason: null, content_filter_results: {}, }], usage: null, }; } // -- Request handler --------------------------------------------------------- /** * @param {http.IncomingMessage} req * @param {http.ServerResponse} res */ function handleRequest(req, res) { const contentLength = req.headers['content-length'] || '0'; const ts = new Date().toISOString().slice(11, -1); // HH:MM:SS.mmm console.log(`[mock-llm] ${ts} ${req.method} ${req.url} (${contentLength} bytes)`); // CORS res.setHeader('Access-Control-Allow-Origin', '*'); res.setHeader('Access-Control-Allow-Methods', 'GET, POST, PUT, DELETE, OPTIONS'); res.setHeader('Access-Control-Allow-Headers', '*'); if (req.method === 'OPTIONS') { res.writeHead(204); res.end(); return; } const url = new URL(req.url || '/', `http://${req.headers.host}`); const path = url.pathname; const json = (/** @type {number} */ status, /** @type {any} */ data) => { res.writeHead(status, { 'Content-Type': 'application/json' }); res.end(JSON.stringify(data)); }; const readBody = () => new Promise(resolve => { let body = ''; req.on('data', chunk => { body += chunk; }); req.on('end', () => resolve(body)); }); // -- Health ------------------------------------------------------- if (path === '/health') { res.writeHead(200); res.end('ok'); return; } // -- Token endpoints (DomainService.tokenURL / tokenNoAuthURL) ---- // /copilot_internal/v2/token, /copilot_internal/v2/nltoken if (path.startsWith('/copilot_internal/')) { if (path.includes('/token') || path.includes('/nltoken')) { json(200, { token: 'perf-benchmark-fake-token', expires_at: Math.floor(Date.now() / 1000) + 3600, refresh_in: 1800, sku: 'free_limited_copilot', individual: true, copilot_plan: 'free', endpoints: { api: `http://${req.headers.host}`, proxy: `http://${req.headers.host}`, }, }); } else { // /copilot_internal/user, /copilot_internal/content_exclusion, etc. json(200, {}); } return; } // -- Telemetry (DomainService.telemetryURL) ---------------------- if (path === '/telemetry') { json(200, {}); return; } // -- Model Router (DomainService.capiModelRouterURL = /models/session/intent) -- // The automode service POSTs here to get the best model for a request. if (path === '/models/session/intent' && req.method === 'POST') { readBody().then(() => { json(200, { model: MODEL }); }); return; } // -- Auto Models / Model Session (DomainService.capiAutoModelURL = /models/session) -- // Returns AutoModeAPIResponse: { available_models, session_token, expires_at } if (path === '/models/session' && req.method === 'POST') { readBody().then(() => { json(200, { available_models: [MODEL, 'gpt-4o-mini'], session_token: 'perf-session-token-' + Date.now(), expires_at: Math.floor(Date.now() / 1000) + 3600, discounted_costs: {}, }); }); return; } // -- Models (DomainService.capiModelsURL = /models) -------------- if (path === '/models' && req.method === 'GET') { json(200, { data: [ { id: MODEL, name: 'GPT-4o (Mock)', version: '2024-05-13', vendor: 'copilot', model_picker_enabled: true, is_chat_default: true, is_chat_fallback: true, billing: { is_premium: false, multiplier: 0 }, capabilities: { type: 'chat', family: 'gpt-4o', tokenizer: 'o200k_base', limits: { // Use a very large token limit so the Responses API compaction // threshold (90% of max_prompt_tokens) is never reached during // perf benchmarks. max_prompt_tokens: 10000000, max_output_tokens: 131072, max_context_window_tokens: 10000000, }, supports: { streaming: true, tool_calls: true, parallel_tool_calls: true, vision: false, }, }, supported_endpoints: ['/chat/completions'], }, { id: 'gpt-4o-mini', name: 'GPT-4o mini (Mock)', version: '2024-07-18', vendor: 'copilot', model_picker_enabled: false, is_chat_default: false, is_chat_fallback: false, billing: { is_premium: false, multiplier: 0 }, capabilities: { type: 'chat', family: 'gpt-4o-mini', tokenizer: 'o200k_base', limits: { max_prompt_tokens: 10000000, max_output_tokens: 131072, max_context_window_tokens: 10000000, }, supports: { streaming: true, tool_calls: true, parallel_tool_calls: true, vision: false, }, }, supported_endpoints: ['/chat/completions'], }, ], }); return; } // -- Model by ID (DomainService.capiModelsURL/{id}) -------------- if (path.startsWith('/models/') && req.method === 'GET') { const modelId = path.split('/models/')[1]?.split('/')[0]; if (path.endsWith('/policy')) { json(200, { state: 'accepted', terms: '' }); return; } json(200, { id: modelId || MODEL, name: 'GPT-4o (Mock)', version: '2024-05-13', vendor: 'copilot', model_picker_enabled: true, is_chat_default: true, is_chat_fallback: true, capabilities: { type: 'chat', family: 'gpt-4o', tokenizer: 'o200k_base', limits: { max_prompt_tokens: 10000000, max_output_tokens: 131072, max_context_window_tokens: 10000000 }, supports: { streaming: true, tool_calls: true, parallel_tool_calls: true, vision: false }, }, }); return; } // -- Agents (DomainService.remoteAgentsURL = /agents) ------------- if (path.startsWith('/agents')) { // /agents/sessions — CopilotSessions if (path.includes('/sessions')) { json(200, { sessions: [], total_count: 0, page_size: 20, page_number: 1 }); } // /agents/swe/models — CCAModelsList else if (path.includes('/swe/models')) { json(200, { data: [{ id: MODEL, name: 'GPT-4o (Mock)', vendor: 'copilot', capabilities: { type: 'chat', family: 'gpt-4o', supports: { streaming: true } } }] }); } // /agents/swe/... — agent jobs, etc. else if (path.includes('/swe/')) { json(200, {}); } // /agents — list agents else { json(200, { agents: [] }); } return; } // -- Chat Completions (DomainService.capiChatURL = /chat/completions) -- if (path === '/chat/completions' && req.method === 'POST') { readBody().then((/** @type {string} */ body) => handleChatCompletions(body, res)); return; } // -- Responses API (DomainService.capiResponsesURL = /responses) -- if (path === '/responses' && req.method === 'POST') { readBody().then((/** @type {string} */ body) => handleChatCompletions(body, res)); return; } // -- Messages API (DomainService.capiMessagesURL = /v1/messages) -- if (path === '/v1/messages' && req.method === 'POST') { readBody().then((/** @type {string} */ body) => handleChatCompletions(body, res)); return; } // -- Proxy completions (/v1/engines/*/completions) ---------------- if (path.includes('/v1/engines/') && req.method === 'POST') { readBody().then((/** @type {string} */ body) => handleChatCompletions(body, res)); return; } // -- Skills, Search, Embeddings ----------------------------------- if (path === '/skills' || path.startsWith('/search/') || path.startsWith('/embeddings')) { json(200, { data: [] }); return; } // -- Catch-all: any remaining POST with messages → chat completions if (req.method === 'POST') { readBody().then((/** @type {string} */ body) => { try { const parsed = JSON.parse(/** @type {string} */(body)); if (parsed.messages && Array.isArray(parsed.messages)) { handleChatCompletions(/** @type {string} */(body), res); return; } } catch { } json(200, {}); }); return; } // -- Catch-all GET → empty success -------------------------------- json(200, {}); } // -- Server lifecycle -------------------------------------------------------- /** Emitted when a scenario chat completion is fully served. */ const serverEvents = new EventEmitter(); /** @param {number} ms */ const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms)); /** * Count the number of model turns already completed for the CURRENT scenario. * Only counts assistant messages that appear after the last user message * containing a [scenario:X] tag. This prevents assistant messages from * previous scenarios (in the same chat session) from inflating the count. * * @param {any[]} messages * @returns {number} */ function countCompletedModelTurns(messages) { // Find the index of the last user message with a scenario tag let scenarioMsgIdx = -1; for (let i = messages.length - 1; i >= 0; i--) { const msg = messages[i]; if (msg.role !== 'user') { continue; } const content = typeof msg.content === 'string' ? msg.content : Array.isArray(msg.content) ? msg.content.map((/** @type {any} */ c) => c.text || '').join('') : ''; if (/\[scenario:[^\]]+\]/.test(content)) { scenarioMsgIdx = i; break; } } // Count assistant messages after the scenario tag message let turns = 0; const startIdx = scenarioMsgIdx >= 0 ? scenarioMsgIdx + 1 : 0; for (let i = startIdx; i < messages.length; i++) { if (messages[i].role === 'assistant') { turns++; } } return turns; } /** * Compute the model-turn index for the current request given the scenario's * turn list. User turns are skipped (they're handled by the test harness) * and do not consume a model turn index. * * The algorithm counts completed assistant messages in the conversation * history (each one = one served model turn), then maps that to the * n-th model turn in the scenario (skipping user turns). * * @param {ScenarioTurn[]} turns * @param {any[]} messages * @returns {{ turn: ModelScenarioTurn, turnIndex: number }} */ function resolveCurrentTurn(turns, messages) { const completedModelTurns = countCompletedModelTurns(messages); // Build the model-only turn list (skip user turns) const modelTurns = /** @type {ModelScenarioTurn[]} */ (turns.filter(t => t.kind !== 'user')); const idx = Math.min(completedModelTurns, modelTurns.length - 1); return { turn: modelTurns[idx], turnIndex: idx }; } /** * @param {string} body * @param {http.ServerResponse} res */ async function handleChatCompletions(body, res) { let scenarioId = DEFAULT_SCENARIO; let isScenarioRequest = false; /** @type {string[]} */ let requestToolNames = []; /** @type {any[]} */ let messages = []; try { const parsed = JSON.parse(body); messages = parsed.messages || []; // Log user messages for debugging const userMsgs = messages.filter((/** @type {any} */ m) => m.role === 'user'); if (userMsgs.length > 0) { const lastContent = typeof userMsgs[userMsgs.length - 1].content === 'string' ? userMsgs[userMsgs.length - 1].content.substring(0, 100) : '(structured)'; const ts = new Date().toISOString().slice(11, -1); console.log(`[mock-llm] ${ts} → ${messages.length} msgs, last user: "${lastContent}"`); } // Extract available tool names from the request's tools array const tools = parsed.tools || []; requestToolNames = tools.map((/** @type {any} */ t) => t.function?.name).filter(Boolean); if (requestToolNames.length > 0) { const ts = new Date().toISOString().slice(11, -1); console.log(`[mock-llm] ${ts} → ${requestToolNames.length} tools available: ${requestToolNames.join(', ')}`); } // Search user messages in reverse order (newest first) for the scenario // tag. This ensures the most recent message's tag takes precedence when // multiple messages with different tags exist in the same conversation // (e.g. in the leak checker which sends many scenarios in one session). // Follow-up user messages in multi-turn scenarios won't have a tag, so // searching backwards still finds the correct tag from the initial message. for (let mi = messages.length - 1; mi >= 0; mi--) { const msg = messages[mi]; if (msg.role !== 'user') { continue; } const content = typeof msg.content === 'string' ? msg.content : Array.isArray(msg.content) ? msg.content.map((/** @type {any} */ c) => c.text || '').join('') : ''; const match = content.match(/\[scenario:([^\]]+)\]/); if (match && SCENARIOS[match[1]]) { scenarioId = match[1]; isScenarioRequest = true; break; } } } catch { } const scenario = SCENARIOS[scenarioId] || SCENARIOS[DEFAULT_SCENARIO]; res.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'X-Request-Id': 'perf-benchmark-' + Date.now(), }); // Handle multi-turn scenarios — only when the request actually has tools. // Ancillary requests (title generation, progress messages) also contain the // [scenario:...] tag but don't send tools, so they fall through to content. if (isMultiTurnScenario(scenario) && requestToolNames.length > 0) { const { turn, turnIndex } = resolveCurrentTurn(scenario.turns, messages); const modelTurnCount = scenario.turns.filter(t => t.kind !== 'user').length; const ts = new Date().toISOString().slice(11, -1); console.log(`[mock-llm] ${ts} → multi-turn scenario ${scenarioId}, model turn ${turnIndex + 1}/${modelTurnCount} (${turn.kind}), ${countCompletedModelTurns(messages)} completed turns in history`); if (turn.kind === 'tool-calls') { await streamToolCalls(res, turn.toolCalls, requestToolNames, scenarioId); return; } if (turn.kind === 'thinking') { await streamThinkingThenContent(res, turn.thinkingChunks, turn.chunks, isScenarioRequest); return; } // kind === 'content' — stream the final text response await streamContent(res, turn.chunks, isScenarioRequest); return; } // Standard content-only scenario (or multi-turn scenario falling back for // ancillary requests like title generation that don't include tools) const chunks = isMultiTurnScenario(scenario) ? getFirstContentTurn(scenario) : /** @type {StreamChunk[]} */ (scenario); await streamContent(res, chunks, isScenarioRequest); } /** * Get the chunks from the first content turn of a multi-turn scenario, * used as fallback text for ancillary requests (title generation etc). * @param {MultiTurnScenario} scenario * @returns {StreamChunk[]} */ function getFirstContentTurn(scenario) { /** @type {ContentScenarioTurn | undefined} */ let contentTurn; for (const turn of scenario.turns) { if (turn.kind === 'content') { contentTurn = turn; break; } if (turn.kind === 'thinking') { contentTurn = turn; break; } } return contentTurn?.chunks ?? getDefaultScenarioChunks(); } /** * Stream content chunks as a standard SSE response. * @param {http.ServerResponse} res * @param {StreamChunk[]} chunks * @param {boolean} isScenarioRequest */ async function streamContent(res, chunks, isScenarioRequest) { res.write(`data: ${JSON.stringify(makeInitialChunk())}\n\n`); for (const chunk of chunks) { if (chunk.delayMs > 0) { await sleep(chunk.delayMs); } res.write(`data: ${JSON.stringify(makeChunk(chunk.content, 0, false))}\n\n`); } res.write(`data: ${JSON.stringify(makeChunk('', 0, true))}\n\n`); res.write('data: [DONE]\n\n'); res.end(); if (isScenarioRequest) { serverEvents.emit('scenarioCompletion'); } } /** * Stream thinking chunks followed by content chunks as an SSE response. * Thinking is emitted as `cot_summary` deltas, then a `cot_id` to close the * thinking block, followed by standard content deltas. * @param {http.ServerResponse} res * @param {StreamChunk[]} thinkingChunks * @param {StreamChunk[]} contentChunks * @param {boolean} isScenarioRequest */ async function streamThinkingThenContent(res, thinkingChunks, contentChunks, isScenarioRequest) { res.write(`data: ${JSON.stringify(makeInitialChunk())}\n\n`); // Stream thinking text for (const chunk of thinkingChunks) { if (chunk.delayMs > 0) { await sleep(chunk.delayMs); } res.write(`data: ${JSON.stringify(makeThinkingChunk(chunk.content))}\n\n`); } // Close thinking block with ID const cotId = `cot_perf_${Date.now()}`; res.write(`data: ${JSON.stringify(makeThinkingIdChunk(cotId))}\n\n`); await sleep(10); // Stream content for (const chunk of contentChunks) { if (chunk.delayMs > 0) { await sleep(chunk.delayMs); } res.write(`data: ${JSON.stringify(makeChunk(chunk.content, 0, false))}\n\n`); } res.write(`data: ${JSON.stringify(makeChunk('', 0, true))}\n\n`); res.write('data: [DONE]\n\n'); res.end(); if (isScenarioRequest) { serverEvents.emit('scenarioCompletion'); } } /** * Stream tool call chunks as an SSE response. * @param {http.ServerResponse} res * @param {Array<{ toolNamePattern: RegExp, arguments: Record }>} toolCalls * @param {string[]} requestToolNames * @param {string} scenarioId */ async function streamToolCalls(res, toolCalls, requestToolNames, scenarioId) { res.write(`data: ${JSON.stringify(makeToolCallInitialChunk())}\n\n`); for (let i = 0; i < toolCalls.length; i++) { const call = toolCalls[i]; const callId = `call_perf_${scenarioId}_${i}_${Date.now()}`; // Find the matching tool name from the request's tools array let toolName = requestToolNames.find(name => call.toolNamePattern.test(name)); if (!toolName) { toolName = call.toolNamePattern.source.replace(/[\\.|?*+^${}()\[\]]/g, ''); console.warn(`[mock-llm] No matching tool for pattern ${call.toolNamePattern}, using fallback: ${toolName}`); } // Stream tool call: start chunk, then arguments in fragments res.write(`data: ${JSON.stringify(makeToolCallStartChunk(i, callId, toolName))}\n\n`); await sleep(10); const argsJson = JSON.stringify(call.arguments); const fragmentSize = Math.max(20, Math.ceil(argsJson.length / 4)); for (let pos = 0; pos < argsJson.length; pos += fragmentSize) { const fragment = argsJson.slice(pos, pos + fragmentSize); res.write(`data: ${JSON.stringify(makeToolCallArgsChunk(i, fragment))}\n\n`); await sleep(5); } } res.write(`data: ${JSON.stringify(makeToolCallFinishChunk())}\n\n`); res.write('data: [DONE]\n\n'); res.end(); } /** * Start the mock server and return a handle. * @param {number} port */ function startServer(port = 0) { return new Promise((resolve, reject) => { let reqCount = 0; let completions = 0; /** @type {Array<() => boolean>} */ let requestWaiters = []; /** @type {Array<() => boolean>} */ let completionWaiters = []; const onCompletion = () => { completions++; completionWaiters = completionWaiters.filter(fn => !fn()); }; serverEvents.on('scenarioCompletion', onCompletion); const server = http.createServer((req, res) => { reqCount++; requestWaiters = requestWaiters.filter(fn => !fn()); handleRequest(req, res); }); server.listen(port, '127.0.0.1', () => { const addr = server.address(); const actualPort = typeof addr === 'object' && addr ? addr.port : port; const url = `http://127.0.0.1:${actualPort}`; resolve({ port: actualPort, url, close: () => /** @type {Promise} */(new Promise((resolve, reject) => { serverEvents.removeListener('scenarioCompletion', onCompletion); server.close(err => err ? reject(err) : resolve(undefined)); })), /** Return total request count. */ requestCount: () => reqCount, /** * Wait until at least `n` requests have been received. * @param {number} n * @param {number} timeoutMs * @returns {Promise} */ waitForRequests: (n, timeoutMs) => new Promise((resolve, reject) => { if (reqCount >= n) { resolve(); return; } const timer = setTimeout(() => reject(new Error(`Timed out waiting for ${n} requests (got ${reqCount})`)), timeoutMs); requestWaiters.push(() => { if (reqCount >= n) { clearTimeout(timer); resolve(); return true; } return false; }); }), /** Return total scenario-completion count. */ completionCount: () => completions, /** * Wait until at least `n` scenario chat completions have been served. * @param {number} n * @param {number} timeoutMs * @returns {Promise} */ waitForCompletion: (n, timeoutMs) => new Promise((resolve, reject) => { if (completions >= n) { resolve(); return; } const timer = setTimeout(() => reject(new Error(`Timed out waiting for ${n} completions (got ${completions})`)), timeoutMs); completionWaiters.push(() => { if (completions >= n) { clearTimeout(timer); resolve(); return true; } return false; }); }), }); }); server.on('error', reject); }); } // Allow running standalone for testing: node scripts/mock-llm-server.js if (require.main === module) { const { registerPerfScenarios } = require('./perf-scenarios'); registerPerfScenarios(); const port = parseInt(process.argv[2] || '0', 10); startServer(port).then((/** @type {any} */ handle) => { console.log(`Mock LLM server listening at ${handle.url}`); console.log('Scenarios:', Object.keys(SCENARIOS).join(', ')); }); } /** * Get the user follow-up messages for a scenario, in order. * Returns an array of { message, afterModelTurn } objects where afterModelTurn * is the 0-based index of the model turn after which this user message should * be injected. * @param {string} scenarioId * @returns {Array<{ message: string, afterModelTurn: number }>} */ function getUserTurns(scenarioId) { const scenario = SCENARIOS[scenarioId]; if (!isMultiTurnScenario(scenario)) { return []; } const result = []; let modelTurnsSeen = 0; for (const turn of scenario.turns) { if (turn.kind === 'user') { result.push({ message: turn.message, afterModelTurn: modelTurnsSeen }); } else { modelTurnsSeen++; } } return result; } /** * Get the total number of model turns (non-user turns) in a scenario. * @param {string} scenarioId * @returns {number} */ function getModelTurnCount(scenarioId) { const scenario = SCENARIOS[scenarioId]; if (!isMultiTurnScenario(scenario)) { return 1; } return scenario.turns.filter(t => t.kind !== 'user').length; } /** * Register a scenario dynamically. Test files call this to add * scenarios that are only relevant to them. * @param {string} id - unique scenario identifier * @param {StreamChunk[] | MultiTurnScenario} definition - scenario data */ function registerScenario(id, definition) { SCENARIOS[id] = definition; } /** * Return the IDs of all currently registered scenarios. * @returns {string[]} */ function getScenarioIds() { return Object.keys(SCENARIOS); } module.exports = { startServer, SCENARIOS, ScenarioBuilder, registerScenario, getScenarioIds, getUserTurns, getModelTurnCount };