Files
vscode/extensions/copilot/test/base/simulationOptions.ts
T
Ulugbek Abdullaev 6bd7400f1c nes-datagen: generate training data from continuous recordings (#323855)
* utils: document binarySearch

* nes-datagen: generate training data from continuous recordings

Continuous enhanced telemetry now ships sliding-window recordings that, unlike per-request alternative-action recordings, carry no requestTime. The datagen pipeline needs a point to split each recording into edit history before/after, so this adds a pluggable pivot strategy (starting with Random, selectable via --pivot-strategy) and a new continuous/ pipeline module that replays a recording at the chosen pivot to produce a processed row.

Along the way this consolidates the pipeline's error and index handling: a shared WithRowIndex<T> replaces the ad-hoc { originalRowIndex, ... } pairs, per-record processing returns Result<IProcessedRow, Error> instead of field-presence unions, and failures surface as original Error objects (no string round-tripping). The telemetry sender's continuous payload is now the documented IContinuousRecording type.

Co-authored-by: Copilot App <223556219+Copilot@users.noreply.github.com>

* nes-datagen: label alt-action replay errors by originalRowIndex

Address PR review: the alternative-action path mislabeled diagnostics when
earlier records failed to parse.

- processAllRows: push replay errors with the row's true `originalRowIndex`
  instead of its position in the filtered `rows` array (parse failures make
  `rows` sparse, so the two diverge).
- loadAndProduceProcessedRows: resolve `languageForRow` via an
  `originalRowIndex`-keyed Map rather than positional `rows[i]`, matching how
  callers pass `e.originalRowIndex`.
- Clarify the `recordCount` doc: it counts successfully-parsed records (parse
  failures are counted separately in `parseErrors`).
- Add a regression spec asserting replay errors carry the row index, not the
  array position.

Co-authored-by: Copilot App <223556219+Copilot@users.noreply.github.com>

---------

Co-authored-by: Copilot App <223556219+Copilot@users.noreply.github.com>
2026-07-01 18:41:11 +05:00

468 lines
23 KiB
TypeScript

/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import minimist from 'minimist';
import { EmbeddingType } from '../../src/platform/embeddings/common/embeddingsComputer';
import { CacheMode } from './simulationContext';
/** Number of runs that are stored in baseline.json */
export const BASELINE_RUN_COUNT = 10;
export enum NesDatagenSampleTask {
Xtab = 'xtab',
CursorSameFile = 'cursor-same-file',
CursorCrossFile = 'cursor-cross-file',
CursorBoth = 'cursor-both',
}
/**
* Shape of the recordings in the nes-datagen input file.
*/
export enum NesDatagenInputFormat {
/** Per-request "alternative action" recordings bookmarked at the NES request time. */
AlternativeAction = 'alternative-action',
/** Continuous enhanced-telemetry slices with no request bookmark; a pivot is synthesized. */
Continuous = 'continuous',
}
/**
* How to choose the pivot in a continuous recording (only meaningful when
* `--input-format=continuous`). The pivot splits the timeline into context and
* the oracle (next user edit).
*/
export enum PivotStrategy {
/** Pick a single eligible pivot uniformly at random. */
Random = 'random',
}
export type NesDatagen = {
readonly input: string;
readonly output: string | undefined;
readonly rowOffset: number;
readonly workerMode: boolean;
readonly sampleTask: NesDatagenSampleTask;
/** Shape of the input recordings. */
readonly inputFormat: NesDatagenInputFormat;
/** Pivot selection strategy for continuous recordings. Ignored for alternative-action input. */
readonly pivotStrategy: PivotStrategy;
/**
* Seed for the continuous pivot RNG. Resolved once (random when `--seed` is
* omitted) so it can be propagated to all parallel workers for reproducible
* output. Ignored for alternative-action input.
*/
readonly seed: number;
/** Minimum same-file lines above the request cursor for a move to count as a jump. */
readonly sameFileJumpMinAbove: number;
/** Minimum same-file lines below the request cursor for a move to count as a jump. */
readonly sameFileJumpMinBelow: number;
};
export class SimulationOptions {
public static fromProcessArgs(): SimulationOptions {
return new SimulationOptions(process.argv);
}
public static fromArray(argv: readonly string[]): SimulationOptions {
return new SimulationOptions(argv);
}
private readonly argv: minimist.ParsedArgs;
public readonly help: boolean;
public readonly listModels: boolean;
public readonly listTests: boolean;
public readonly listSuites: boolean;
public readonly jsonOutput: boolean;
public readonly nRuns: number;
public readonly chatModel: string | undefined;
public readonly smartChatModel: string | undefined;
public readonly fastChatModel: string | undefined;
public readonly fastRewriteModel: string | undefined;
public readonly summarizeHistory: boolean;
public readonly swebenchPrompt: boolean;
public readonly embeddingType: EmbeddingType | undefined;
public readonly boost: boolean;
public readonly parallelism: number;
public readonly lmCacheMode: CacheMode;
public readonly modelCacheMode: CacheMode;
public readonly resourcesCacheMode: CacheMode;
public readonly cachePath: string | undefined;
public readonly externalBaseline: string | undefined;
public readonly externalScenarios: string | undefined;
public readonly output: string | undefined;
public readonly inline: boolean;
public readonly sidebar: boolean;
public readonly applyChatCodeBlocks: boolean;
public readonly stageCacheEntries: boolean;
public readonly ci: boolean;
public readonly gc: boolean;
public readonly externalCacheLayersPath: string | undefined;
public readonly verbose: number | boolean | undefined;
public readonly grep: string[] | string | undefined;
public readonly omitGrep: string | undefined;
public readonly heapSnapshots: boolean | string | undefined;
/** --scenario-test, --scenarioTest Run tests from provided scenario test file name */
public readonly scenarioTest: string | undefined;
public readonly isUpdateBaseline: boolean;
public readonly noFetch: boolean;
public readonly noCachePointer: boolean;
/**
* A label for the current simulation run, to be displayed in the UI for distinguishing between runs.
*/
public readonly label: string;
public readonly runServerPoweredNesProvider: boolean;
public readonly nes: 'external' | 'coffe' | undefined;
public readonly nesUrl: string | undefined;
public readonly nesApiKey: string | undefined;
public readonly nesDatagen: NesDatagen | undefined;
public readonly subcommand: 'nes-datagen' | undefined;
public readonly disabledTools: Set<string>;
/** If true, all tests are run in the extension host */
public readonly inExtensionHost: boolean;
/** Extensions to ensure are available in the extension host */
public readonly installExtensions: string[];
/** Whether to run headless (defaults to false) */
public readonly headless: boolean;
/** @internal Only run a single test number */
public readonly runNumber: number;
/** Explicit workspace URI to use for stest --in-extension-host */
public readonly useScenarioWorkspace: boolean;
/** If true, will try to use code search using our service. */
public readonly useExperimentalCodeSearchService: boolean;
public readonly configFile: string | undefined;
public readonly modelConfigFile: string | undefined;
/**
* Path to a JSON file describing an adhoc chat request to send (used by the
* simulation workbench "Adhoc request sender" mode). The file contains
* `{ system: string; user: string; model: string }`.
*/
public readonly adhocRequestFile: string | undefined;
protected constructor(processArgv: readonly string[]) {
const argv = minimist(processArgv.slice(2));
this.argv = argv;
this.help = boolean(argv['help'], false);
this.listModels = boolean(argv['list-models'], false);
this.listTests = boolean(argv['list-tests'], false);
this.listSuites = boolean(argv['list-suites'], false);
this.jsonOutput = boolean(argv['json'], false);
this.isUpdateBaseline = boolean(argv['update-baseline'] ?? argv['u'], false);
this.boost = boolean(argv['boost'], false);
const fetch = boolean(argv['fetch'], true);
this.noFetch = !fetch; // `--no-fetch` becomes argv[`fetch`] because of how minimist works
const cachePointer = boolean(argv['cache-pointer'], true);
this.noCachePointer = !cachePointer; // `--no-cache-pointer` becomes argv[`cache-pointer`] because of how minimist works
this.nRuns = typeof argv['n'] === 'number' ? argv['n'] : (this.isUpdateBaseline || argv['ci'] ? BASELINE_RUN_COUNT : 10);
this.chatModel = this.argv['model'];
this.smartChatModel = this.argv['smart-model'];
this.fastChatModel = this.argv['fast-model'];
this.fastRewriteModel = this.argv['fast-rewrite-model'];
this.summarizeHistory = boolean(argv['summarize-history'], true);
this.swebenchPrompt = boolean(argv['swebench-prompt'], false);
this.embeddingType = cliOptionsToWellKnownEmbeddingsType(this.argv['embedding-model']);
this.parallelism = this.argv['parallelism'] ?? this.argv['p'] ?? 20;
this.modelCacheMode = this.argv['skip-model-cache'] ? CacheMode.Disable : CacheMode.Default;
this.lmCacheMode = (
this.argv['skip-cache'] ? CacheMode.Disable
: (this.argv['require-cache'] ? CacheMode.Require : CacheMode.Default)
);
this.resourcesCacheMode = (
this.argv['skip-resources-cache'] ? CacheMode.Disable : CacheMode.Default
);
this.externalScenarios = this.argv['external-scenarios'];
this.externalBaseline = this.argv['external-baseline']; // must be set after `externalScenarios`
this.validateExternalBaseline();
this.output = this.argv['output'];
this.cachePath = this.argv['cache-location'];
this.inline = boolean(this.argv['inline'], false);
this.sidebar = boolean(this.argv['sidebar'], false);
this.applyChatCodeBlocks = boolean(this.argv['apply-chat-code-blocks'], false);
this.stageCacheEntries = boolean(this.argv['stage-cache-entries'], false);
this.ci = boolean(this.argv['ci'], false);
this.gc = boolean(this.argv['gc'], false);
this.externalCacheLayersPath = argv['external-cache-layers-path'];
this.verbose = this.argv['verbose'];
this.grep = argv['grep'];
this.omitGrep = argv['omit-grep'];
this.heapSnapshots = argv['heap-snapshots'];
this.scenarioTest = argv['scenarioTest'] ?? argv['scenario-test'];
this.label = argv['label'] ?? '';
this.inExtensionHost = boolean(argv['in-extension-host'], false);
this.installExtensions = argv['install-extension'] ? argv['install-extension'].split(',') : [];
this.headless = boolean(argv['headless'], true);
this.runNumber = Number(argv['run-number']) || 0;
this.runServerPoweredNesProvider = boolean(argv['runServerPoweredNesProvider'], false);
this.nes = SimulationOptions.validateNesArgument(argv['nes']);
this.nesUrl = argv['nes-url'];
// [SuppressMessage("Microsoft.Security", "CS002:SecretInNextLine", Justification="used for local simulation tests")]
this.nesApiKey = argv['nes-api-key'];
SimulationOptions.validateNesUrlOverride(this.nesUrl, this.nesApiKey);
this.disabledTools = argv['disable-tools'] ? new Set(argv['disable-tools'].split(',')) : new Set();
this.useScenarioWorkspace = boolean(argv['scenario-workspace-folder'], false);
this.useExperimentalCodeSearchService = boolean(argv['use-experimental-code-search-service'], false);
const isNesDatagen = (argv._ as string[]).includes('nes-datagen');
this.subcommand = isNesDatagen ? 'nes-datagen' : undefined;
this.nesDatagen = isNesDatagen && argv['input']
? {
input: argv['input'],
output: argv['out'],
rowOffset: typeof argv['row-offset'] === 'number' ? argv['row-offset'] : 0,
workerMode: boolean(argv['worker'], false),
sampleTask: SimulationOptions.validateSampleTask(argv['sample-task']),
inputFormat: SimulationOptions.validateInputFormat(argv['input-format']),
pivotStrategy: SimulationOptions.validatePivotStrategy(argv['pivot-strategy']),
seed: SimulationOptions.resolveSeed(argv['seed']),
sameFileJumpMinAbove: typeof argv['same-file-jump-min-above'] === 'number' ? argv['same-file-jump-min-above'] : 2,
sameFileJumpMinBelow: typeof argv['same-file-jump-min-below'] === 'number' ? argv['same-file-jump-min-below'] : 5,
}
: undefined;
this.configFile = argv['config-file'];
this.modelConfigFile = argv['model-config-file'];
this.adhocRequestFile = argv['adhoc-request-file'];
}
public printHelp(): void {
console.log([
`Example usages: `,
` npm run simulate`,
` npm run simulate -- --external-scenarios=<path> --inline --output=<path>`,
` npm run simulate -- --external-scenarios=<path> --sidebar --output=<path>`,
` npm run simulate -- --external-scenarios=<path> --nes --output=<path>`,
` npm run simulate -- --update-baseline`,
``,
` -u, --update-baseline Updates scores in baseline.json if they change as a result of your changes to prompts sent to the model`,
` --external-scenarios Path to a directory containing scenarios to run`,
` --inline Run inline chat external scenarios`,
` --sidebar Run sidebar chat external scenarios`,
` --nes Run NES external scenarios`,
` --output Path to a directory where to generate output`,
` --n Run each scenario N times`,
` --ci Equivalent to --n=${BASELINE_RUN_COUNT} but throws if the baseline is not up-to-date`,
` --gc Used with --require-cache to compact cache layers into the baseline cache`,
` --external-cache-layers-path Used to specify the path to the external cache layers`,
` --grep Run a test which contains the passed-in string`,
` --omit-grep Run a test which does not contain the passed-in string`,
` --embedding-model Specify the model to use for the embedding endpoint (default: ada)`,
` Values: ada, text3small, text3large`,
` --list-models List available chat models`,
` --model Specify the model to use for the chat endpoint (use --list-models to see valid options)`,
` --smart-model Specify the model to use in place of the smarter slower model, i.e GPT 4o`,
` --fast-model Specify the model to use in place of the faster / less smart model, i.e GPT 4o mini`,
` --fast-rewrite-model [experimental] Specify the model to use for the fast rewrite endpoint`,
` -p, --parallelism [experimental] Run tests in parallel (default: 1)`,
` --skip-cache [experimental] Do not use the cache for language model requests`,
` --require-cache [experimental] Require cache hits, fail on cache misses`,
` --regenerate-cache [experimental] Fetch all responses and refresh the cache`,
` --skip-resources-cache [experimental] Do not use the cache for computed resources`,
` --skip-model-cache [experimental] Do not use the cache for model metadata`,
` --stage-cache-entries [experimental] Stage cache files that were used in current simulation run`,
` --list-tests List tests without running them`,
` --json Print output in JSONL format`,
` --verbose Print more information about test and assertion failures`,
` --scenario-test Run tests from provided scenario test file name, e.g., 'docComment.stest' or 'docComment.stest.ts' (--scenarioTest is supported but will be deprecated in future)`,
` --no-fetch Do not send requests to the model endpoint (uses cache but doesn't write to it) (useful to make sure prompts are unchanged by observing cache misses)`,
` --no-cache-pointer [experimental] Do not write files to outcome/`,
` --label A label for the current simulation run, to be displayed in the UI for distinguishing between runs`,
` --nes-url To override endpoint URL for NES (must be used with --nes-api-key)`,
` --nes-api-key API key for endpoint URL provided via NES (must be used with --nes-url)`,
` --runServerPoweredNesProvider Run stests against the http server powered NES provider (server must be run at port 8001)`,
` --disable-tools A comma-separated list of tools to disable`,
` --swebench-prompt Use the headless swebench prompt for agent mode`,
` --summarize-history Enable experimental conversation history summarization in agent mode`,
` --scenario-workspace-folder If true, runs the stest inline in the scenario's workspace folder`,
` --config-file Path to a JSON file containing configuration options`,
` --model-config-file Path to a JSON file containing model configuration options`,
``,
`Subcommands:`,
` nes-datagen Generate training data from alternative action recordings`,
` Run 'npm run simulate -- nes-datagen --help' for options`,
``,
].join('\n'));
}
public printTrainHelp(): void {
console.log([
`Usage: npm run simulate -- --config-file=<path> [global options] nes-datagen --input=<path> [options]`,
``,
`Generate training data by replaying alternative action recordings through the NES prompt pipeline.`,
`The prompting strategy is read from the model configuration in --config-file.`,
``,
`Options:`,
` --input Path to a JSON or JSON Lines file with training data recordings (required)`,
` Format is inferred from the extension: .jsonl/.ndjson → JSON Lines, otherwise JSON array`,
` --out Output path for the JSON Lines file. Default: <input-path>_output.jsonl`,
` --input-format Shape of the input recordings (default: alternative-action)`,
` Values: alternative-action, continuous`,
` alternative-action → per-request recordings bookmarked at the NES request time`,
` continuous → continuous enhanced-telemetry slices; a pivot is synthesized`,
` --pivot-strategy How to pick the pivot in a continuous recording (default: random; only for --input-format=continuous)`,
` Values: random`,
` random → pick a single eligible pivot uniformly at random`,
` --seed Integer seed for the continuous pivot RNG (default: random, logged for reproducibility)`,
` --sample-task Which target to generate (default: xtab)`,
` Values: xtab, cursor-same-file, cursor-cross-file, cursor-both`,
` xtab → edit-prediction sample (assistant = an edit)`,
` cursor-same-file → next-cursor-line sample restricted to the active file`,
` cursor-cross-file → next-cursor-line sample for a jump to another file`,
` cursor-both → tries same-file first, falls back to cross-file (one sample per row)`,
` --same-file-jump-min-above Minimum lines above request cursor for a same-file move to count as a jump (default: 2)`,
` --same-file-jump-min-below Minimum lines below request cursor for a same-file move to count as a jump (default: 5)`,
``,
`Global options (placed before 'nes-datagen'):`,
` --config-file Path to a JSON config file (required for nes-datagen)`,
` Must include "github.copilot.chat.inlineEdits.xtabProvider.modelConfiguration"`,
` with at least { "modelName", "promptingStrategy", "includeTagsInCurrentFile" }`,
` -p, --parallelism Number of parallel workers (default: 20)`,
` --verbose Print detailed progress and error information`,
` --help Show this help message`,
``,
`Examples:`,
` npm run simulate -- --config-file=config.json nes-datagen --input=data.json`,
` npm run simulate -- --config-file=config.json --parallelism=10 --verbose nes-datagen --input=data.json`,
` npm run simulate -- --config-file=config.json nes-datagen --input=data.json --sample-task=cursor-same-file`,
` npm run simulate -- --config-file=config.json nes-datagen --input=data.json --sample-task=cursor-cross-file`,
` npm run simulate -- --config-file=config.json nes-datagen --input=data.json --sample-task=cursor-both --same-file-jump-min-above=8 --same-file-jump-min-below=8`,
` npm run simulate -- --config-file=config.json nes-datagen --input=continuous.jsonl --input-format=continuous`,
` npm run simulate -- --config-file=config.json nes-datagen --input=continuous.jsonl --input-format=continuous --pivot-strategy=random --seed=42`,
``,
].join('\n'));
}
private validateExternalBaseline() {
if (this.externalBaseline && !this.externalScenarios) {
throw new Error('External scenarios must be provided for external baseline to work.');
}
}
private static validateNesArgument(nes: unknown): 'external' | 'coffe' | undefined {
if (nes === undefined || nes === null) {
return undefined;
}
if (typeof nes === 'boolean') { // this's for backward compat because previously it was possible to just pass `--nes` to run external stests against NES
return 'external';
}
if (typeof nes !== 'string') {
throw new Error(`--nes must be a string, but got: ${typeof nes}`);
}
switch (nes) {
case 'external':
case 'coffe':
return nes;
default:
throw new Error(`--nes can only be 'external' or 'coffe', but got: ${nes}`);
}
}
private static validateNesUrlOverride(nesUrl: string | undefined, nesApiKey: string | undefined): void {
if (nesUrl !== undefined && nesApiKey === undefined) {
throw new Error(`--nesApiKey must be provided when --nesUrl is set`);
}
if (nesUrl === undefined && nesApiKey !== undefined) {
throw new Error(`--nesUrl must be provided when --nesApiKey is set`);
}
}
private static validateSampleTask(value: unknown): NesDatagenSampleTask {
if (value === undefined || value === null) {
return NesDatagenSampleTask.Xtab;
}
if (typeof value !== 'string') {
throw new Error(`--sample-task must be a string, but got: ${typeof value}`);
}
const allowed = Object.values(NesDatagenSampleTask) as string[];
if (!allowed.includes(value)) {
throw new Error(`--sample-task must be one of [${allowed.join(', ')}], but got: ${value}`);
}
return value as NesDatagenSampleTask;
}
private static validateInputFormat(value: unknown): NesDatagenInputFormat {
if (value === undefined || value === null) {
return NesDatagenInputFormat.AlternativeAction;
}
if (typeof value !== 'string') {
throw new Error(`--input-format must be a string, but got: ${typeof value}`);
}
const allowed = Object.values(NesDatagenInputFormat) as string[];
if (!allowed.includes(value)) {
throw new Error(`--input-format must be one of [${allowed.join(', ')}], but got: ${value}`);
}
return value as NesDatagenInputFormat;
}
private static validatePivotStrategy(value: unknown): PivotStrategy {
if (value === undefined || value === null) {
return PivotStrategy.Random;
}
if (typeof value !== 'string') {
throw new Error(`--pivot-strategy must be a string, but got: ${typeof value}`);
}
const allowed = Object.values(PivotStrategy) as string[];
if (!allowed.includes(value)) {
throw new Error(`--pivot-strategy must be one of [${allowed.join(', ')}], but got: ${value}`);
}
return value as PivotStrategy;
}
/**
* Resolve the continuous pivot seed. When `--seed` is omitted a random
* 32-bit seed is generated so that the parent can log it and propagate it to
* every worker, keeping output reproducible.
*/
private static resolveSeed(value: unknown): number {
if (value === undefined || value === null) {
return Math.floor(Math.random() * 0x100000000);
}
if (typeof value !== 'number' || !Number.isInteger(value)) {
throw new Error(`--seed must be an integer, but got: ${value}`);
}
return value >>> 0;
}
}
function cliOptionsToWellKnownEmbeddingsType(model: string | undefined): EmbeddingType | undefined {
switch (model) {
case 'text3small':
case EmbeddingType.text3small_512.id:
return EmbeddingType.text3small_512;
case 'metis':
case EmbeddingType.metis_1024_I16_Binary.id:
return EmbeddingType.metis_1024_I16_Binary;
case undefined:
return undefined;
default:
throw new Error(`Unknown embedding model: ${model}`);
}
}
function boolean(value: any, defaultValue: boolean): boolean {
if (typeof value === 'undefined') {
return defaultValue;
}
if (value === 'false') {
// treat the string 'false' as false
return false;
}
return Boolean(value);
}