diff --git a/extensions/copilot/package.json b/extensions/copilot/package.json index a68dd9da48b..c139f0adcd7 100644 --- a/extensions/copilot/package.json +++ b/extensions/copilot/package.json @@ -1051,6 +1051,38 @@ ] } }, + { + "name": "copilot_githubTextSearch", + "legacyToolReferenceFullNames": [ + "githubTextSearch" + ], + "toolReferenceName": "githubTextSearch", + "displayName": "%github.copilot.tools.githubTextSearch.name%", + "modelDescription": "Lexically searches a GitHub repository or organization for files containing specific keywords or code patterns. Use this when looking for exact strings, function names, or identifiers in a GitHub repo or org. Unlike the semantic search tool, this uses keyword matching rather than meaning-based search.", + "userDescription": "%github.copilot.tools.githubTextSearch.userDescription%", + "icon": "$(search)", + "inputSchema": { + "type": "object", + "properties": { + "scope": { + "type": "string", + "description": "The GitHub scope to search. Use 'owner/repo' to search a single repository, or an org name (no slash) to search across an entire organization." + }, + "query": { + "type": "string", + "description": "The keyword search query. Supports GitHub code search syntax such as 'language:typescript', 'extension:ts', 'path:src/', etc." + }, + "maxResults": { + "type": "number", + "description": "Optional. The maximum number of search results to return. Defaults to 100." + } + }, + "required": [ + "scope", + "query" + ] + } + }, { "name": "copilot_switchAgent", "toolReferenceName": "switchAgent", @@ -1271,7 +1303,8 @@ "icon": "$(globe)", "tools": [ "fetch", - "githubRepo" + "githubRepo", + "githubTextSearch" ] } ], diff --git a/extensions/copilot/package.nls.json b/extensions/copilot/package.nls.json index 122540aab72..ade5bdaa461 100644 --- a/extensions/copilot/package.nls.json +++ b/extensions/copilot/package.nls.json @@ -274,8 +274,10 @@ "github.copilot.tools.searchResults.name": "Search View Results", "github.copilot.tools.searchResults.description": "Get the results of the search view", "github.copilot.config.getSearchViewResultsSkill.enabled": "Enable the Search View Results skill and disable the corresponding tool.", - "github.copilot.tools.githubRepo.name": "Search GitHub Repository", - "github.copilot.tools.githubRepo.userDescription": "Search a GitHub repository for relevant source code snippets. You can specify a repository using `owner/repo`", + "github.copilot.tools.githubRepo.name": "Semantic Search GitHub Repository", + "github.copilot.tools.githubRepo.userDescription": "Semantic Search a GitHub repository for relevant source code snippets. You can specify a repository using `owner/repo`", + "github.copilot.tools.githubTextSearch.name": "GitHub Text Search", + "github.copilot.tools.githubTextSearch.userDescription": "Text search a GitHub repository or organization for files containing specific keywords or code patterns.", "github.copilot.config.autoFix": "Automatically fix diagnostics for edited files.", "github.copilot.config.rateLimitAutoSwitchToAuto": "Automatically switch to the Auto model and retry when you hit a per-model rate limit.", "github.copilot.tools.createNewWorkspace.userDescription": "Scaffold a new workspace in VS Code", diff --git a/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-opus-4.5/all_non_edit_tools.spec.snap b/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-opus-4.5/all_non_edit_tools.spec.snap index bcb792fd42c..afbfef22b30 100644 --- a/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-opus-4.5/all_non_edit_tools.spec.snap +++ b/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-opus-4.5/all_non_edit_tools.spec.snap @@ -91,6 +91,7 @@ get_project_setup_info get_search_view_results get_vscode_api github_repo +github_text_search install_extension read_notebook_cell_output read_project_structure diff --git a/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-opus-4.5/all_tools.spec.snap b/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-opus-4.5/all_tools.spec.snap index 85814394368..4743542bdf0 100644 --- a/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-opus-4.5/all_tools.spec.snap +++ b/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-opus-4.5/all_tools.spec.snap @@ -90,6 +90,7 @@ get_project_setup_info get_search_view_results get_vscode_api github_repo +github_text_search install_extension read_notebook_cell_output read_project_structure diff --git a/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-opus-4.6/all_non_edit_tools.spec.snap b/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-opus-4.6/all_non_edit_tools.spec.snap index a22d84d1ebe..3ba1c3a97d2 100644 --- a/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-opus-4.6/all_non_edit_tools.spec.snap +++ b/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-opus-4.6/all_non_edit_tools.spec.snap @@ -73,6 +73,7 @@ get_project_setup_info get_search_view_results get_vscode_api github_repo +github_text_search install_extension read_notebook_cell_output read_project_structure diff --git a/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-opus-4.6/all_tools.spec.snap b/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-opus-4.6/all_tools.spec.snap index 0aa2cd1657c..6c10dbbdb81 100644 --- a/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-opus-4.6/all_tools.spec.snap +++ b/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-opus-4.6/all_tools.spec.snap @@ -73,6 +73,7 @@ get_project_setup_info get_search_view_results get_vscode_api github_repo +github_text_search install_extension read_notebook_cell_output read_project_structure diff --git a/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-sonnet-4.5/all_non_edit_tools.spec.snap b/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-sonnet-4.5/all_non_edit_tools.spec.snap index bcb792fd42c..afbfef22b30 100644 --- a/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-sonnet-4.5/all_non_edit_tools.spec.snap +++ b/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-sonnet-4.5/all_non_edit_tools.spec.snap @@ -91,6 +91,7 @@ get_project_setup_info get_search_view_results get_vscode_api github_repo +github_text_search install_extension read_notebook_cell_output read_project_structure diff --git a/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-sonnet-4.5/all_tools.spec.snap b/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-sonnet-4.5/all_tools.spec.snap index 85814394368..4743542bdf0 100644 --- a/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-sonnet-4.5/all_tools.spec.snap +++ b/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-sonnet-4.5/all_tools.spec.snap @@ -90,6 +90,7 @@ get_project_setup_info get_search_view_results get_vscode_api github_repo +github_text_search install_extension read_notebook_cell_output read_project_structure diff --git a/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-sonnet-4.6/all_non_edit_tools.spec.snap b/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-sonnet-4.6/all_non_edit_tools.spec.snap index ef8381871d9..513c41537c9 100644 --- a/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-sonnet-4.6/all_non_edit_tools.spec.snap +++ b/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-sonnet-4.6/all_non_edit_tools.spec.snap @@ -73,6 +73,7 @@ get_project_setup_info get_search_view_results get_vscode_api github_repo +github_text_search install_extension read_notebook_cell_output read_project_structure diff --git a/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-sonnet-4.6/all_tools.spec.snap b/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-sonnet-4.6/all_tools.spec.snap index 8cc8f00a568..8f118188a40 100644 --- a/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-sonnet-4.6/all_tools.spec.snap +++ b/extensions/copilot/src/extension/prompts/node/agent/test/__snapshots__/agentPrompts-claude-sonnet-4.6/all_tools.spec.snap @@ -73,6 +73,7 @@ get_project_setup_info get_search_view_results get_vscode_api github_repo +github_text_search install_extension read_notebook_cell_output read_project_structure diff --git a/extensions/copilot/src/extension/tools/common/toolNames.ts b/extensions/copilot/src/extension/tools/common/toolNames.ts index d6db8f32bc2..412c1e50e09 100644 --- a/extensions/copilot/src/extension/tools/common/toolNames.ts +++ b/extensions/copilot/src/extension/tools/common/toolNames.ts @@ -47,7 +47,8 @@ export enum ToolName { FindTestFiles = 'test_search', GetProjectSetupInfo = 'get_project_setup_info', SearchViewResults = 'get_search_view_results', - GithubRepo = 'github_repo', + GithubSemanticRepoSearch = 'github_repo', + GithubTextSearch = 'github_text_search', CreateDirectory = 'create_directory', RunVscodeCmd = 'run_vscode_command', CoreManageTodoList = 'manage_todo_list', @@ -132,7 +133,8 @@ export enum ContributedToolName { FindTestFiles = 'copilot_findTestFiles', GetProjectSetupInfo = 'copilot_getProjectSetupInfo', SearchViewResults = 'copilot_getSearchResults', - GithubRepo = 'copilot_githubRepo', + GithubSemanticRepoSearch = 'copilot_githubRepo', + GithubTextSearch = 'copilot_githubTextSearch', CreateAndRunTask = 'copilot_createAndRunTask', CreateDirectory = 'copilot_createDirectory', RunVscodeCmd = 'copilot_runVscodeCommand', @@ -223,7 +225,8 @@ export const toolCategories: Record = { // Web Interaction [ToolName.FetchWebPage]: ToolCategory.WebInteraction, - [ToolName.GithubRepo]: ToolCategory.WebInteraction, + [ToolName.GithubSemanticRepoSearch]: ToolCategory.WebInteraction, + [ToolName.GithubTextSearch]: ToolCategory.WebInteraction, [ToolName.CoreOpenBrowserPage]: ToolCategory.WebInteraction, [ToolName.CoreClickElement]: ToolCategory.WebInteraction, [ToolName.CoreScreenshotPage]: ToolCategory.WebInteraction, diff --git a/extensions/copilot/src/extension/tools/node/allTools.ts b/extensions/copilot/src/extension/tools/node/allTools.ts index 22d768a4b84..8f1170fb9f1 100644 --- a/extensions/copilot/src/extension/tools/node/allTools.ts +++ b/extensions/copilot/src/extension/tools/node/allTools.ts @@ -15,7 +15,8 @@ import './findTextInFilesTool'; import './getErrorsTool'; import './getNotebookCellOutputTool'; import './getSearchViewResultsTool'; -import './githubRepoTool'; +import './githubRepoSemanticSearchTool.tsx'; +import './githubTextSearchTool'; import './insertEditTool'; import './installExtensionTool'; import './listDirTool'; diff --git a/extensions/copilot/src/extension/tools/node/githubRepoTool.tsx b/extensions/copilot/src/extension/tools/node/githubRepoSemanticSearchTool.tsx similarity index 91% rename from extensions/copilot/src/extension/tools/node/githubRepoTool.tsx rename to extensions/copilot/src/extension/tools/node/githubRepoSemanticSearchTool.tsx index 09c83d6022e..2b17061d371 100644 --- a/extensions/copilot/src/extension/tools/node/githubRepoTool.tsx +++ b/extensions/copilot/src/extension/tools/node/githubRepoSemanticSearchTool.tsx @@ -38,9 +38,8 @@ interface PrepareError { readonly details?: string; } -export class GithubRepoTool implements ICopilotTool { - public static readonly toolName = ToolName.GithubRepo; - +export class GithubRepoSemanticSearchTool implements ICopilotTool { + public static readonly toolName = ToolName.GithubSemanticRepoSearch; constructor( @IRunCommandExecutionService _commandService: IRunCommandExecutionService, @@ -61,14 +60,15 @@ export class GithubRepoTool implements ICopilotTool { throw new Error('No embedding models available'); } - const searchResults = await this._githubCodeSearch.searchRepo({ silent: true }, embeddingType, { githubRepoId, localRepoRoot: undefined, indexedCommit: undefined }, options.input.query, 64, {}, new TelemetryCorrelationId('github-repo-tool'), token); + const searchResults = await this._githubCodeSearch.semanticSearch({ silent: true }, embeddingType, { kind: 'repo', githubRepoId, localRepoRoot: undefined, indexedCommit: undefined }, options.input.query, 64, {}, new TelemetryCorrelationId('github-repo-tool'), token); - // Map the chunks to URIs - // TODO: Won't work for proxima or branches not called main + // Map the chunks to URIs using the remote URL and ref from the search response + const repoBaseUrl = searchResults.remoteUrl ?? `https://github.com/${toGithubNwo(githubRepoId)}`; + const ref = searchResults.refName ?? 'main'; const chunks = searchResults.chunks.map((entry): FileChunkAndScore => ({ chunk: { ...entry.chunk, - file: URI.joinPath(URI.parse('https://github.com'), toGithubNwo(githubRepoId), 'tree', 'main', entry.chunk.file.path).with({ + file: URI.joinPath(URI.parse(repoBaseUrl), 'tree', ref, entry.chunk.file.path).with({ fragment: `L${entry.chunk.range.startLineNumber}-L${entry.chunk.range.endLineNumber}`, }), }, @@ -229,4 +229,4 @@ class GithubChunkSearchResults extends PromptElement { + public static readonly toolName = ToolName.GithubTextSearch; + + constructor( + @IInstantiationService private readonly _instantiationService: IInstantiationService, + @IGithubCodeSearchService private readonly _githubCodeSearch: IGithubCodeSearchService, + ) { } + + async invoke(options: vscode.LanguageModelToolInvocationOptions, token: CancellationToken): Promise { + const scope = parseScope(options.input.scope); + if (!scope) { + throw new Error(l10n.t`Invalid input. Could not parse 'scope' argument`); + } + + const maxResults = options.input.maxResults ?? 100; + + const searchResults = await this._githubCodeSearch.lexicalSearch( + { silent: true }, + scope, + options.input.query, + maxResults, + {}, + new TelemetryCorrelationId('github-text-search-tool'), + token, + ); + + const chunks = searchResults.chunks.map((entry): FileChunkAndScore => { + let file = entry.file; + if (file.scheme === 'githubRepoResult') { + // Path format: /owner/repo/relative/file/path + const parts = file.path.split('/').filter(Boolean); + if (parts.length >= 3) { + const nwo = `${parts[0]}/${parts[1]}`; + const relativePath = parts.slice(2).join('/'); + file = URI.joinPath(URI.parse('https://github.com'), nwo, 'tree', 'main', '/' + relativePath).with({ + fragment: entry.range.startLineNumber > 0 + ? `L${entry.range.startLineNumber}-L${entry.range.endLineNumber}` + : undefined, + }); + } + } + return { chunk: { ...entry, file }, distance: undefined }; + }); + + let references: PromptReference[] = []; + const json = await renderPromptElementJSON(this._instantiationService, GithubTextSearchResults, { + chunks, + referencesOut: references, + }); + const result = new ExtendedLanguageModelToolResult([ + new LanguageModelPromptTsxPart(json), + ]); + + references = getUniqueReferences(references); + const scopeLabel = options.input.scope; + result.toolResultMessage = references.length === 0 ? + new MarkdownString(l10n.t`Searched ${scopeLabel} for "${options.input.query}", no results`) : + references.length === 1 ? + new MarkdownString(l10n.t`Searched ${scopeLabel} for "${options.input.query}", 1 result`) : + new MarkdownString(l10n.t`Searched ${scopeLabel} for "${options.input.query}", ${references.length} results`); + result.toolResultDetails = references + .map(r => r.anchor) + .filter(r => isUri(r) || isLocation(r)); + return result; + } + + async prepareInvocation(options: vscode.LanguageModelToolInvocationPrepareOptions, _token: vscode.CancellationToken): Promise { + if (!options.input.scope) { + throw new Error(l10n.t`Invalid input. No 'scope' argument provided`); + } + if (!parseScope(options.input.scope)) { + throw new Error(l10n.t`Invalid input. Could not parse 'scope' argument`); + } + return { + invocationMessage: l10n.t("Searching '{0}' for '{1}'", options.input.scope, options.input.query), + }; + } +} + +function parseScope(scope: string): GithubCodeSearchScope | undefined { + if (!scope) { + return undefined; + } + if (scope.includes('/')) { + const repoId = GithubRepoId.parse(scope); + if (!repoId) { + return undefined; + } + return { kind: 'repo', githubRepoId: repoId, localRepoRoot: undefined, indexedCommit: undefined }; + } + + return { kind: 'org', org: scope }; +} + +interface GithubTextSearchResultsProps extends BasePromptElementProps { + readonly chunks: FileChunkAndScore[]; + readonly referencesOut: PromptReference[]; +} + +class GithubTextSearchResults extends PromptElement { + override render(_state: void, _sizing: PromptSizing): PromptPiece | undefined { + const references: PromptReference[] = []; + const seenFiles = new Set(); + + const renderedChunks = this.props.chunks + .filter(x => x.chunk.text) + .map(chunk => { + const fileKey = chunk.chunk.file.toString(); + if (!seenFiles.has(fileKey)) { + seenFiles.add(fileKey); + references.push(new PromptReference(chunk.chunk.file)); + } + + const githubInfo = parseGithubFileUrl(chunk.chunk.file); + const displayPath = githubInfo?.path ?? chunk.chunk.file.toString(); + const nwoLabel = githubInfo?.nwo; + + const lineInfo = ` starting at line ${chunk.chunk.range.startLineNumber}`; + + const headerText = nwoLabel + ? `Text match excerpt from \`${nwoLabel}\` in \`${displayPath}\`${lineInfo}:` + : `Text match excerpt in \`${displayPath}\`${lineInfo}:`; + + return + {headerText}
+ {createFencedCodeBlock(getLanguageId(chunk.chunk.file), chunk.chunk.text)}

+
; + }); + + this.props.referencesOut.push(...references); + + return <> + + {renderedChunks} + ; + } +} + +function parseGithubFileUrl(uri: URI): { nwo: string; path: string } | undefined { + if (uri.scheme === 'https' && uri.authority === 'github.com') { + const parts = uri.path.split('/').filter(Boolean); + // Pattern: /owner/repo/tree/branch/...path + if (parts.length >= 4 && parts[2] === 'tree') { + return { + nwo: `${parts[0]}/${parts[1]}`, + path: parts.slice(4).join('/'), + }; + } + } + return undefined; +} + +ToolRegistry.registerTool(GithubTextSearchTool); diff --git a/extensions/copilot/src/platform/github/common/githubAPI.ts b/extensions/copilot/src/platform/github/common/githubAPI.ts index dcd7b8cafc2..86b9893e182 100644 --- a/extensions/copilot/src/platform/github/common/githubAPI.ts +++ b/extensions/copilot/src/platform/github/common/githubAPI.ts @@ -133,6 +133,8 @@ export interface GitHubAPIRequestOptions { version?: string; type?: 'json' | 'text'; userAgent?: string; + accept?: string; + additionalHeaders?: { [key: string]: string }; returnStatusCodeOnError?: boolean; silent404?: boolean; callSite?: string; @@ -147,9 +149,10 @@ export async function makeGitHubAPIRequest( method: 'GET' | 'POST', token: string | undefined, options?: GitHubAPIRequestOptions) { - const { body, version, type = 'json', userAgent, returnStatusCodeOnError = false, silent404 = false, callSite = 'github-api-rest' } = options ?? {}; + const { body, version, type = 'json', userAgent, accept, additionalHeaders, returnStatusCodeOnError = false, silent404 = false, callSite = 'github-api-rest' } = options ?? {}; const headers: { [key: string]: string } = { - 'Accept': 'application/vnd.github+json', + 'Accept': accept ?? 'application/vnd.github+json', + ...additionalHeaders, }; if (token) { headers['Authorization'] = `Bearer ${token}`; diff --git a/extensions/copilot/src/platform/remoteCodeSearch/common/adoCodeSearchService.ts b/extensions/copilot/src/platform/remoteCodeSearch/common/adoCodeSearchService.ts index 7fe8f22869c..9df8122ddee 100644 --- a/extensions/copilot/src/platform/remoteCodeSearch/common/adoCodeSearchService.ts +++ b/extensions/copilot/src/platform/remoteCodeSearch/common/adoCodeSearchService.ts @@ -26,7 +26,7 @@ import { measureExecTime } from '../../log/common/logExecTime'; import { ILogService } from '../../log/common/logService'; import { getRequest, postRequest } from '../../networking/common/networking'; import { ITelemetryService } from '../../telemetry/common/telemetry'; -import { CodeSearchOptions, CodeSearchResult, RemoteCodeSearchError, RemoteCodeSearchIndexState, RemoteCodeSearchIndexStatus } from './remoteCodeSearch'; +import { CodeSearchOptions, RemoteCodeSearchError, RemoteCodeSearchIndexState, RemoteCodeSearchIndexStatus, SemanticCodeSearchResult } from './remoteCodeSearch'; interface ResponseShape { @@ -100,7 +100,7 @@ export interface IAdoCodeSearchService { options: CodeSearchOptions, telemetryInfo: TelemetryCorrelationId, token: CancellationToken, - ): Promise; + ): Promise; } /** @@ -251,7 +251,7 @@ export class AdoCodeSearchService extends Disposable implements IAdoCodeSearchSe options: CodeSearchOptions, telemetryInfo: TelemetryCorrelationId, token: CancellationToken - ): Promise { + ): Promise { const totalSw = new StopWatch(); const authToken = await this.getAdoAuthToken(auth.silent); diff --git a/extensions/copilot/src/platform/remoteCodeSearch/common/githubCodeSearchService.ts b/extensions/copilot/src/platform/remoteCodeSearch/common/githubCodeSearchService.ts index 9ecdcc7041e..52b87af651c 100644 --- a/extensions/copilot/src/platform/remoteCodeSearch/common/githubCodeSearchService.ts +++ b/extensions/copilot/src/platform/remoteCodeSearch/common/githubCodeSearchService.ts @@ -13,19 +13,20 @@ import { URI } from '../../../util/vs/base/common/uri'; import { Range } from '../../../util/vs/editor/common/core/range'; import { createDecorator, IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation'; import { IAuthenticationService } from '../../authentication/common/authentication'; -import { FileChunkAndScore } from '../../chunking/common/chunk'; +import { FileChunk, FileChunkAndScore } from '../../chunking/common/chunk'; import { stripChunkTextMetadata, truncateToMaxUtf8Length } from '../../chunking/common/chunkingStringUtils'; import { EmbeddingType } from '../../embeddings/common/embeddingsComputer'; import { ICAPIClientService } from '../../endpoint/common/capiClient'; import { IEnvService } from '../../env/common/envService'; import { GithubRepoId, toGithubNwo } from '../../git/common/gitService'; +import { makeGitHubAPIRequest } from '../../github/common/githubAPI'; import { getGithubMetadataHeaders } from '../../github/common/githubApiFetcherService'; import { IIgnoreService } from '../../ignore/common/ignoreService'; import { ILogService } from '../../log/common/logService'; -import { Response } from '../../networking/common/fetcherService'; +import { IFetcherService, Response } from '../../networking/common/fetcherService'; import { postRequest } from '../../networking/common/networking'; import { ITelemetryService } from '../../telemetry/common/telemetry'; -import { CodeSearchOptions, CodeSearchResult, RemoteCodeSearchError, RemoteCodeSearchIndexState, RemoteCodeSearchIndexStatus } from './remoteCodeSearch'; +import { CodeSearchOptions, LexicalCodeSearchResult, RemoteCodeSearchError, RemoteCodeSearchIndexState, RemoteCodeSearchIndexStatus, SemanticCodeSearchResult } from './remoteCodeSearch'; interface ResponseShape { @@ -46,6 +47,7 @@ type SemanticSearchResult = { location: { path: string; // file path commit_sha: string; + ref_name: string; repo: { nwo: string; url: string; @@ -54,11 +56,19 @@ type SemanticSearchResult = { }; export interface GithubCodeSearchRepoInfo { + readonly kind: 'repo'; readonly githubRepoId: GithubRepoId; readonly localRepoRoot: URI | undefined; readonly indexedCommit: string | undefined; } +export interface GithubCodeSearchOrgInfo { + readonly kind: 'org'; + readonly org: string; +} + +export type GithubCodeSearchScope = GithubCodeSearchRepoInfo | GithubCodeSearchOrgInfo; + export const IGithubCodeSearchService = createDecorator('IGithubCodeSearchService'); export interface IGithubCodeSearchService { @@ -89,16 +99,29 @@ export interface IGithubCodeSearchService { * * The repo must have been indexed first. Make sure to check {@link getRemoteIndexState} or call {@link triggerIndexing}. */ - searchRepo( + semanticSearch( authOptions: { readonly silent: boolean }, embeddingType: EmbeddingType, - repo: GithubCodeSearchRepoInfo, + scope: GithubCodeSearchRepoInfo, query: string, maxResults: number, options: CodeSearchOptions, telemetryInfo: TelemetryCorrelationId, token: CancellationToken, - ): Promise; + ): Promise; + + /** + * Lexical searches a given github repo or org for relevant code snippets + */ + lexicalSearch( + authOptions: { readonly silent: boolean }, + scope: GithubCodeSearchScope, + query: string, + maxResults: number, + options: CodeSearchOptions, + telemetryInfo: TelemetryCorrelationId, + token: CancellationToken, + ): Promise; } export class GithubCodeSearchService implements IGithubCodeSearchService { @@ -109,6 +132,7 @@ export class GithubCodeSearchService implements IGithubCodeSearchService { @IAuthenticationService private readonly _authenticationService: IAuthenticationService, @ICAPIClientService private readonly _capiClientService: ICAPIClientService, @IEnvService private readonly _envService: IEnvService, + @IFetcherService private readonly _fetcherService: IFetcherService, @IIgnoreService private readonly _ignoreService: IIgnoreService, @ILogService private readonly _logService: ILogService, @ITelemetryService private readonly _telemetryService: ITelemetryService, @@ -252,7 +276,7 @@ export class GithubCodeSearchService implements IGithubCodeSearchService { return Result.ok(true); } - async searchRepo( + async semanticSearch( auth: { readonly silent: boolean }, embeddingType: EmbeddingType, repo: GithubCodeSearchRepoInfo, @@ -261,7 +285,7 @@ export class GithubCodeSearchService implements IGithubCodeSearchService { options: CodeSearchOptions, telemetryInfo: TelemetryCorrelationId, token: CancellationToken - ): Promise { + ): Promise { const authToken = await this.getGithubAccessToken(auth.silent); if (!authToken) { throw new Error('No valid auth token'); @@ -342,6 +366,80 @@ export class GithubCodeSearchService implements IGithubCodeSearchService { return result; } + async lexicalSearch( + auth: { readonly silent: boolean }, + scope: GithubCodeSearchScope, + query: string, + maxResults: number, + options: CodeSearchOptions, + telemetryInfo: TelemetryCorrelationId, + token: CancellationToken + ): Promise { + const authToken = await this.getGithubAccessToken(auth.silent); + if (!authToken) { + throw new Error('No valid auth token'); + } + + const scopeQualifier = scope.kind === 'org' ? `org:${scope.org}` : `repo:${toGithubNwo(scope.githubRepoId)}`; + const searchQuery = `${query} ${scopeQualifier}`; + const routeSlug = `search/code?q=${encodeURIComponent(searchQuery)}&per_page=${maxResults}`; + + const body = await raceCancellationError(makeGitHubAPIRequest( + this._fetcherService, + this._logService, + this._telemetryService, + this._capiClientService.dotcomAPIURL, + routeSlug, + 'GET', + authToken, + { + accept: 'application/vnd.github.text-match+json', + additionalHeaders: getGithubMetadataHeaders(telemetryInfo.callTracker, this._envService), + callSite: 'github-code-search-lexical', + }, + ), token); + + if (!body) { + /* __GDPR__ + "githubCodeSearch.lexicalSearch.error" : { + "owner": "mjbvz", + "comment": "Information about failed lexical code searches", + "workspaceSearchSource": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Caller of the search" }, + "workspaceSearchCorrelationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Correlation id for the search" } + } + */ + this._telemetryService.sendMSFTTelemetryEvent('githubCodeSearch.lexicalSearch.error', { + workspaceSearchSource: telemetryInfo.callTracker.toString(), + workspaceSearchCorrelationId: telemetryInfo.correlationId, + }); + + throw new Error(`Code search lexical search failed`); + } + if (!Array.isArray(body.items)) { + throw new Error(`Code search lexical search unexpected response json shape`); + } + + const result = await raceCancellationError(parseLexicalSearchResponse(body, scope, options, this._ignoreService), token); + + /* __GDPR__ + "githubCodeSearch.lexicalSearch.success" : { + "owner": "mjbvz", + "comment": "Information about successful lexical code searches", + "workspaceSearchSource": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Caller of the search" }, + "workspaceSearchCorrelationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Correlation id for the search" }, + "resultCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Total number of returned items from the search" } + } + */ + this._telemetryService.sendMSFTTelemetryEvent('githubCodeSearch.lexicalSearch.success', { + workspaceSearchSource: telemetryInfo.callTracker.toString(), + workspaceSearchCorrelationId: telemetryInfo.correlationId, + }, { + resultCount: body.items.length, + }); + + return result; + } + private async getGithubAccessToken(silent: boolean) { return (await this._authenticationService.getGitHubSession('permissive', { silent }))?.accessToken ?? (await this._authenticationService.getGitHubSession('any', { silent }))?.accessToken; @@ -370,7 +468,7 @@ export class GithubCodeSearchService implements IGithubCodeSearchService { } } -export async function parseGithubCodeSearchResponse(body: ResponseShape, repo: GithubCodeSearchRepoInfo, options: CodeSearchOptions & { skipVerifyRepo?: boolean }, ignoreService: IIgnoreService): Promise { +export async function parseGithubCodeSearchResponse(body: ResponseShape, repo: GithubCodeSearchRepoInfo, options: CodeSearchOptions & { skipVerifyRepo?: boolean }, ignoreService: IIgnoreService): Promise { let outOfSync = false; const outChunks: FileChunkAndScore[] = []; @@ -415,5 +513,106 @@ export async function parseGithubCodeSearchResponse(body: ResponseShape, repo: G }); })); - return { chunks: outChunks, outOfSync }; + // Extract the remote URL and ref name from the first result + const firstResult = body.results[0]; + let remoteUrl: string | undefined; + let refName: string | undefined; + if (firstResult) { + // Derive the web URL from the API URL (e.g. https://api.github.com/repos/o/r -> https://github.com/o/r) + const apiUrl = firstResult.location.repo.url; + const nwo = firstResult.location.repo.nwo; + try { + const parsed = URI.parse(apiUrl); + const host = parsed.authority === 'api.github.com' ? 'github.com' : parsed.authority.replace(/^api\./, ''); + remoteUrl = `https://${host}/${nwo}`; + } catch { + // Fall back to constructing from nwo + remoteUrl = `https://github.com/${nwo}`; + } + + // Extract branch name from ref_name (e.g. "refs/heads/main" -> "main") + const rawRef = firstResult.location.ref_name; + if (rawRef?.startsWith('refs/heads/')) { + refName = rawRef.slice('refs/heads/'.length); + } else if (rawRef) { + refName = rawRef; + } + } + + return { chunks: outChunks, outOfSync, remoteUrl, refName }; +} + +interface LexicalSearchResponseShape { + readonly total_count: number; + readonly incomplete_results: boolean; + readonly items: readonly LexicalSearchItem[]; +} + +type LexicalSearchItem = { + readonly path: string; + readonly repository: { + readonly full_name: string; + }; + readonly text_matches?: readonly { + readonly fragment: string; + readonly matches: readonly { readonly text: string; readonly indices: readonly [number, number] }[]; + readonly object_type: string; + readonly property: string; + }[]; + readonly score: number; +}; + +export async function parseLexicalSearchResponse(body: LexicalSearchResponseShape, scope: GithubCodeSearchScope & { skipVerifyRepo?: boolean }, options: CodeSearchOptions & { skipVerifyRepo?: boolean }, ignoreService: IIgnoreService): Promise { + const outChunks: FileChunk[] = []; + + await Promise.all(body.items.map(async (item): Promise => { + if (!options.skipVerifyRepo && scope.kind === 'repo' && item.repository.full_name.toLowerCase() !== toGithubNwo(scope.githubRepoId)) { + return; + } + if (!options.skipVerifyRepo && scope.kind === 'org' && item.repository.full_name.toLowerCase().split('/')[0] !== scope.org.toLowerCase()) { + return; + } + + const localRepoRoot = scope.kind === 'repo' ? scope.localRepoRoot : undefined; + let fileUri: URI; + if (localRepoRoot) { + fileUri = URI.joinPath(localRepoRoot, item.path); + if (await ignoreService.isCopilotIgnored(fileUri)) { + return; + } + } else { + fileUri = URI.from({ + scheme: 'githubRepoResult', + path: '/' + item.repository.full_name + '/' + item.path + }); + } + + if (!shouldInclude(fileUri, options.globPatterns)) { + return; + } + + const textMatches = item.text_matches?.filter(m => m.property === 'content'); + if (textMatches && textMatches.length > 0) { + for (const match of textMatches) { + outChunks.push({ + file: fileUri, + text: match.fragment, + rawText: undefined, + range: new Range(0, 0, 0, 0), + isFullFile: false, + }); + } + } else { + // No text matches, include the file as a whole-file result + outChunks.push({ + file: fileUri, + text: '', + rawText: undefined, + range: new Range(0, 0, 0, 0), + isFullFile: true, + }); + } + })); + + return { chunks: outChunks, outOfSync: false }; } diff --git a/extensions/copilot/src/platform/remoteCodeSearch/common/remoteCodeSearch.ts b/extensions/copilot/src/platform/remoteCodeSearch/common/remoteCodeSearch.ts index e53ba6da08f..3066f82630e 100644 --- a/extensions/copilot/src/platform/remoteCodeSearch/common/remoteCodeSearch.ts +++ b/extensions/copilot/src/platform/remoteCodeSearch/common/remoteCodeSearch.ts @@ -3,7 +3,7 @@ * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ import { GlobIncludeOptions } from '../../../util/common/glob'; -import { FileChunkAndScore } from '../../chunking/common/chunk'; +import { FileChunk, FileChunkAndScore } from '../../chunking/common/chunk'; export enum RemoteCodeSearchIndexStatus { /** The repo index is built and ready to use */ @@ -29,11 +29,23 @@ export type RemoteCodeSearchError = | { readonly type: 'generic-error'; readonly error: Error } ; -export interface CodeSearchResult { - readonly chunks: readonly FileChunkAndScore[]; - +interface BaseCodeSearchResult { /** Tracks if the commit sha code search used differs from the one we used to compute the local diff */ readonly outOfSync: boolean; + + /** The web URL of the remote repo, e.g. `https://github.com/microsoft/vscode` */ + readonly remoteUrl?: string; + + /** The branch name the results are from, e.g. `main` */ + readonly refName?: string; +} + +export interface SemanticCodeSearchResult extends BaseCodeSearchResult { + readonly chunks: readonly FileChunkAndScore[]; +} + +export interface LexicalCodeSearchResult extends BaseCodeSearchResult { + readonly chunks: readonly FileChunk[]; } export interface CodeSearchOptions { diff --git a/extensions/copilot/src/platform/workspaceChunkSearch/node/codeSearch/codeSearchChunkSearch.ts b/extensions/copilot/src/platform/workspaceChunkSearch/node/codeSearch/codeSearchChunkSearch.ts index c148434c729..66cf005867a 100644 --- a/extensions/copilot/src/platform/workspaceChunkSearch/node/codeSearch/codeSearchChunkSearch.ts +++ b/extensions/copilot/src/platform/workspaceChunkSearch/node/codeSearch/codeSearchChunkSearch.ts @@ -33,7 +33,7 @@ import { Change } from '../../../git/vscode/git'; import { logExecTime, LogExecTime } from '../../../log/common/logExecTime'; import { ILogService } from '../../../log/common/logService'; import { IAdoCodeSearchService } from '../../../remoteCodeSearch/common/adoCodeSearchService'; -import { CodeSearchResult } from '../../../remoteCodeSearch/common/remoteCodeSearch'; +import { SemanticCodeSearchResult } from '../../../remoteCodeSearch/common/remoteCodeSearch'; import { ICodeSearchAuthenticationService } from '../../../remoteCodeSearch/node/codeSearchRepoAuth'; import { isGitHubRemoteRepository } from '../../../remoteRepositories/common/utils'; import { IExperimentationService } from '../../../telemetry/common/nullExperimentationService'; @@ -555,13 +555,13 @@ export class CodeSearchChunkSearch extends Disposable { localSearchCts.cancel(); throw e; }) - : Promise.resolve({ chunks: [], outOfSync: false }); + : Promise.resolve({ chunks: [], outOfSync: false }); const localSearchOperation = raceTimeout(this.searchLocalDiff(diffArray, sizing, query, options, innerTelemetryInfo, localSearchCts.token), this.localDiffSearchTimeout, () => { localSearchCts.cancel(); }); - let codeSearchResults: CodeSearchResult | undefined; + let codeSearchResults: SemanticCodeSearchResult | undefined; let localResults: DiffSearchResult | undefined; try { codeSearchResults = await raceCancellationError(codeSearchOperation, token); @@ -720,7 +720,7 @@ export class CodeSearchChunkSearch extends Disposable { */ this._telemetryService.sendMSFTTelemetryEvent('codeSearchChunkSearch.perf.doCodeSearchWithRetry', { status }, { execTime }); }) - private async doCodeSearch(query: WorkspaceChunkQueryWithEmbeddings, repos: readonly CodeSearchRepo[], sizing: StrategySearchSizing, options: WorkspaceChunkSearchOptions, telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise { + private async doCodeSearch(query: WorkspaceChunkQueryWithEmbeddings, repos: readonly CodeSearchRepo[], sizing: StrategySearchSizing, options: WorkspaceChunkSearchOptions, telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise { const results = await Promise.all(repos.map(repo => { return repo.searchRepo({ silent: true }, this._embeddingType, query.queryText, sizing.maxResultCountHint, options, telemetryInfo, token); })); diff --git a/extensions/copilot/src/platform/workspaceChunkSearch/node/codeSearch/codeSearchRepo.ts b/extensions/copilot/src/platform/workspaceChunkSearch/node/codeSearch/codeSearchRepo.ts index 8de868ca13d..11ad3d58c41 100644 --- a/extensions/copilot/src/platform/workspaceChunkSearch/node/codeSearch/codeSearchRepo.ts +++ b/extensions/copilot/src/platform/workspaceChunkSearch/node/codeSearch/codeSearchRepo.ts @@ -16,7 +16,7 @@ import { measureExecTime } from '../../../log/common/logExecTime'; import { ILogService } from '../../../log/common/logService'; import { IAdoCodeSearchService } from '../../../remoteCodeSearch/common/adoCodeSearchService'; import { IGithubCodeSearchService } from '../../../remoteCodeSearch/common/githubCodeSearchService'; -import { CodeSearchResult, RemoteCodeSearchError, RemoteCodeSearchIndexState, RemoteCodeSearchIndexStatus } from '../../../remoteCodeSearch/common/remoteCodeSearch'; +import { RemoteCodeSearchError, RemoteCodeSearchIndexState, RemoteCodeSearchIndexStatus, SemanticCodeSearchResult } from '../../../remoteCodeSearch/common/remoteCodeSearch'; import { ITelemetryService } from '../../../telemetry/common/telemetry'; import { WorkspaceChunkSearchOptions } from '../../common/workspaceChunkSearch'; import { RepoInfo } from './repoTracker'; @@ -149,7 +149,7 @@ export interface CodeSearchRepo extends IDisposable { options: WorkspaceChunkSearchOptions, telemetryInfo: TelemetryCorrelationId, token: CancellationToken - ): Promise; + ): Promise; triggerRemoteIndexingOfRepo(triggerReason: BuildIndexTriggerReason, telemetryInfo: TelemetryCorrelationId): Promise>; @@ -232,7 +232,7 @@ abstract class BaseRemoteCodeSearchRepo extends Disposable implements CodeSearch this._onDidChangeStatus.fire(this._state.status); } - public abstract searchRepo(authOptions: { silent: boolean }, embeddingType: EmbeddingType, resolvedQuery: string, maxResultCountHint: number, options: WorkspaceChunkSearchOptions, telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise; + public abstract searchRepo(authOptions: { silent: boolean }, embeddingType: EmbeddingType, resolvedQuery: string, maxResultCountHint: number, options: WorkspaceChunkSearchOptions, telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise; public abstract triggerRemoteIndexingOfRepo(triggerReason: BuildIndexTriggerReason, telemetryInfo: TelemetryCorrelationId): Promise>; public abstract prepareSearch(telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise; @@ -381,8 +381,9 @@ export class GithubCodeSearchRepo extends BaseRemoteCodeSearchRepo { super(repoInfo, remoteInfo, logService, telemetryService); } - public override async searchRepo(authOptions: { silent: boolean }, embeddingType: EmbeddingType, resolvedQuery: string, maxResultCountHint: number, options: WorkspaceChunkSearchOptions, telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise { - const result = await this._githubCodeSearchService.searchRepo(authOptions, embeddingType, { + public override async searchRepo(authOptions: { silent: boolean }, embeddingType: EmbeddingType, resolvedQuery: string, maxResultCountHint: number, options: WorkspaceChunkSearchOptions, telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise { + const result = await this._githubCodeSearchService.semanticSearch(authOptions, embeddingType, { + kind: 'repo', githubRepoId: this._githubRepoId, localRepoRoot: this.repoInfo.rootUri, indexedCommit: undefined, // TODO @@ -502,7 +503,7 @@ export class AdoCodeSearchRepo extends BaseRemoteCodeSearchRepo { super(repoInfo, remoteInfo, logService, telemetryService); } - public searchRepo(authOptions: { silent: boolean }, _embeddingType: EmbeddingType, resolvedQuery: string, maxResultCountHint: number, options: WorkspaceChunkSearchOptions, telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise { + public searchRepo(authOptions: { silent: boolean }, _embeddingType: EmbeddingType, resolvedQuery: string, maxResultCountHint: number, options: WorkspaceChunkSearchOptions, telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise { return this._adoCodeSearchService.searchRepo(authOptions, { adoRepoId: this._adoRepoId, localRepoRoot: this.repoInfo.rootUri, diff --git a/extensions/copilot/test/base/simuliationWorkspaceChunkSearch.ts b/extensions/copilot/test/base/simuliationWorkspaceChunkSearch.ts index 1bfd2125363..c79ae2e2384 100644 --- a/extensions/copilot/test/base/simuliationWorkspaceChunkSearch.ts +++ b/extensions/copilot/test/base/simuliationWorkspaceChunkSearch.ts @@ -7,8 +7,8 @@ import { EmbeddingType } from '../../src/platform/embeddings/common/embeddingsCo import { GithubRepoId } from '../../src/platform/git/common/gitService'; import { IIgnoreService } from '../../src/platform/ignore/common/ignoreService'; import { ILogService } from '../../src/platform/log/common/logService'; -import { GithubCodeSearchRepoInfo, IGithubCodeSearchService, parseGithubCodeSearchResponse } from '../../src/platform/remoteCodeSearch/common/githubCodeSearchService'; -import { CodeSearchResult, RemoteCodeSearchError, RemoteCodeSearchIndexState, RemoteCodeSearchIndexStatus } from '../../src/platform/remoteCodeSearch/common/remoteCodeSearch'; +import { GithubCodeSearchScope, IGithubCodeSearchService, parseGithubCodeSearchResponse } from '../../src/platform/remoteCodeSearch/common/githubCodeSearchService'; +import { LexicalCodeSearchResult, RemoteCodeSearchError, RemoteCodeSearchIndexState, RemoteCodeSearchIndexStatus, SemanticCodeSearchResult } from '../../src/platform/remoteCodeSearch/common/remoteCodeSearch'; import { WorkspaceChunkQuery, WorkspaceChunkSearchOptions } from '../../src/platform/workspaceChunkSearch/common/workspaceChunkSearch'; import { BuildIndexTriggerReason, TriggerIndexingError } from '../../src/platform/workspaceChunkSearch/node/codeSearch/codeSearchRepo'; import { IWorkspaceChunkSearchService, WorkspaceChunkSearchResult, WorkspaceChunkSearchSizing, WorkspaceIndexState } from '../../src/platform/workspaceChunkSearch/node/workspaceChunkSearchService'; @@ -34,7 +34,11 @@ class SimulationGithubCodeSearchService extends Disposable implements IGithubCod super(); } - async searchRepo(authOptions: { silent: boolean }, embeddingType: EmbeddingType, repo: GithubCodeSearchRepoInfo, query: string, maxResults: number, options: WorkspaceChunkSearchOptions, _telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise { + async lexicalSearch(_authOptions: { silent: boolean }, _scope: GithubCodeSearchScope, _query: string, _maxResults: number, _options: WorkspaceChunkSearchOptions, _telemetryInfo: TelemetryCorrelationId, _token: CancellationToken): Promise { + throw new Error('Method not implemented.'); + } + + async semanticSearch(authOptions: { silent: boolean }, embeddingType: EmbeddingType, repo: GithubCodeSearchScope & { kind: 'repo' }, query: string, maxResults: number, options: WorkspaceChunkSearchOptions, _telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise { this._logService.trace(`SimulationGithubCodeSearchService::searchRepo(${repo.githubRepoId}, ${query})`); const response = await fetch(searchEndpoint, { method: 'POST', @@ -97,7 +101,8 @@ export class SimulationCodeSearchChunkSearchService extends Disposable implement async searchFileChunks(sizing: WorkspaceChunkSearchSizing, query: WorkspaceChunkQuery, options: WorkspaceChunkSearchOptions, telemetryInfo: TelemetryCorrelationId, progress: Progress | undefined, token: CancellationToken): Promise { const repo = new GithubRepoId('test-org', 'test-repo'); try { - const results = await this._githubCodeSearchService.searchRepo({ silent: true }, EmbeddingType.text3small_512, { + const results = await this._githubCodeSearchService.semanticSearch({ silent: true }, EmbeddingType.text3small_512, { + kind: 'repo', githubRepoId: repo, indexedCommit: undefined, localRepoRoot: undefined,