Merge pull request #312228 from mjbvz/dev/mjbvz/boring-python

Add basic github lexical search
This commit is contained in:
Matt Bierner
2026-04-23 15:08:53 -07:00
committed by GitHub
21 changed files with 498 additions and 48 deletions
+34 -1
View File
@@ -1051,6 +1051,38 @@
]
}
},
{
"name": "copilot_githubTextSearch",
"legacyToolReferenceFullNames": [
"githubTextSearch"
],
"toolReferenceName": "githubTextSearch",
"displayName": "%github.copilot.tools.githubTextSearch.name%",
"modelDescription": "Lexically searches a GitHub repository or organization for files containing specific keywords or code patterns. Use this when looking for exact strings, function names, or identifiers in a GitHub repo or org. Unlike the semantic search tool, this uses keyword matching rather than meaning-based search.",
"userDescription": "%github.copilot.tools.githubTextSearch.userDescription%",
"icon": "$(search)",
"inputSchema": {
"type": "object",
"properties": {
"scope": {
"type": "string",
"description": "The GitHub scope to search. Use 'owner/repo' to search a single repository, or an org name (no slash) to search across an entire organization."
},
"query": {
"type": "string",
"description": "The keyword search query. Supports GitHub code search syntax such as 'language:typescript', 'extension:ts', 'path:src/', etc."
},
"maxResults": {
"type": "number",
"description": "Optional. The maximum number of search results to return. Defaults to 100."
}
},
"required": [
"scope",
"query"
]
}
},
{
"name": "copilot_switchAgent",
"toolReferenceName": "switchAgent",
@@ -1271,7 +1303,8 @@
"icon": "$(globe)",
"tools": [
"fetch",
"githubRepo"
"githubRepo",
"githubTextSearch"
]
}
],
+4 -2
View File
@@ -274,8 +274,10 @@
"github.copilot.tools.searchResults.name": "Search View Results",
"github.copilot.tools.searchResults.description": "Get the results of the search view",
"github.copilot.config.getSearchViewResultsSkill.enabled": "Enable the Search View Results skill and disable the corresponding tool.",
"github.copilot.tools.githubRepo.name": "Search GitHub Repository",
"github.copilot.tools.githubRepo.userDescription": "Search a GitHub repository for relevant source code snippets. You can specify a repository using `owner/repo`",
"github.copilot.tools.githubRepo.name": "Semantic Search GitHub Repository",
"github.copilot.tools.githubRepo.userDescription": "Semantic Search a GitHub repository for relevant source code snippets. You can specify a repository using `owner/repo`",
"github.copilot.tools.githubTextSearch.name": "GitHub Text Search",
"github.copilot.tools.githubTextSearch.userDescription": "Text search a GitHub repository or organization for files containing specific keywords or code patterns.",
"github.copilot.config.autoFix": "Automatically fix diagnostics for edited files.",
"github.copilot.config.rateLimitAutoSwitchToAuto": "Automatically switch to the Auto model and retry when you hit a per-model rate limit.",
"github.copilot.tools.createNewWorkspace.userDescription": "Scaffold a new workspace in VS Code",
@@ -91,6 +91,7 @@ get_project_setup_info
get_search_view_results
get_vscode_api
github_repo
github_text_search
install_extension
read_notebook_cell_output
read_project_structure
@@ -90,6 +90,7 @@ get_project_setup_info
get_search_view_results
get_vscode_api
github_repo
github_text_search
install_extension
read_notebook_cell_output
read_project_structure
@@ -73,6 +73,7 @@ get_project_setup_info
get_search_view_results
get_vscode_api
github_repo
github_text_search
install_extension
read_notebook_cell_output
read_project_structure
@@ -73,6 +73,7 @@ get_project_setup_info
get_search_view_results
get_vscode_api
github_repo
github_text_search
install_extension
read_notebook_cell_output
read_project_structure
@@ -91,6 +91,7 @@ get_project_setup_info
get_search_view_results
get_vscode_api
github_repo
github_text_search
install_extension
read_notebook_cell_output
read_project_structure
@@ -90,6 +90,7 @@ get_project_setup_info
get_search_view_results
get_vscode_api
github_repo
github_text_search
install_extension
read_notebook_cell_output
read_project_structure
@@ -73,6 +73,7 @@ get_project_setup_info
get_search_view_results
get_vscode_api
github_repo
github_text_search
install_extension
read_notebook_cell_output
read_project_structure
@@ -73,6 +73,7 @@ get_project_setup_info
get_search_view_results
get_vscode_api
github_repo
github_text_search
install_extension
read_notebook_cell_output
read_project_structure
@@ -47,7 +47,8 @@ export enum ToolName {
FindTestFiles = 'test_search',
GetProjectSetupInfo = 'get_project_setup_info',
SearchViewResults = 'get_search_view_results',
GithubRepo = 'github_repo',
GithubSemanticRepoSearch = 'github_repo',
GithubTextSearch = 'github_text_search',
CreateDirectory = 'create_directory',
RunVscodeCmd = 'run_vscode_command',
CoreManageTodoList = 'manage_todo_list',
@@ -132,7 +133,8 @@ export enum ContributedToolName {
FindTestFiles = 'copilot_findTestFiles',
GetProjectSetupInfo = 'copilot_getProjectSetupInfo',
SearchViewResults = 'copilot_getSearchResults',
GithubRepo = 'copilot_githubRepo',
GithubSemanticRepoSearch = 'copilot_githubRepo',
GithubTextSearch = 'copilot_githubTextSearch',
CreateAndRunTask = 'copilot_createAndRunTask',
CreateDirectory = 'copilot_createDirectory',
RunVscodeCmd = 'copilot_runVscodeCommand',
@@ -223,7 +225,8 @@ export const toolCategories: Record<ToolName, ToolCategory> = {
// Web Interaction
[ToolName.FetchWebPage]: ToolCategory.WebInteraction,
[ToolName.GithubRepo]: ToolCategory.WebInteraction,
[ToolName.GithubSemanticRepoSearch]: ToolCategory.WebInteraction,
[ToolName.GithubTextSearch]: ToolCategory.WebInteraction,
[ToolName.CoreOpenBrowserPage]: ToolCategory.WebInteraction,
[ToolName.CoreClickElement]: ToolCategory.WebInteraction,
[ToolName.CoreScreenshotPage]: ToolCategory.WebInteraction,
@@ -15,7 +15,8 @@ import './findTextInFilesTool';
import './getErrorsTool';
import './getNotebookCellOutputTool';
import './getSearchViewResultsTool';
import './githubRepoTool';
import './githubRepoSemanticSearchTool.tsx';
import './githubTextSearchTool';
import './insertEditTool';
import './installExtensionTool';
import './listDirTool';
@@ -38,9 +38,8 @@ interface PrepareError {
readonly details?: string;
}
export class GithubRepoTool implements ICopilotTool<GithubRepoToolParams> {
public static readonly toolName = ToolName.GithubRepo;
export class GithubRepoSemanticSearchTool implements ICopilotTool<GithubRepoToolParams> {
public static readonly toolName = ToolName.GithubSemanticRepoSearch;
constructor(
@IRunCommandExecutionService _commandService: IRunCommandExecutionService,
@@ -61,14 +60,15 @@ export class GithubRepoTool implements ICopilotTool<GithubRepoToolParams> {
throw new Error('No embedding models available');
}
const searchResults = await this._githubCodeSearch.searchRepo({ silent: true }, embeddingType, { githubRepoId, localRepoRoot: undefined, indexedCommit: undefined }, options.input.query, 64, {}, new TelemetryCorrelationId('github-repo-tool'), token);
const searchResults = await this._githubCodeSearch.semanticSearch({ silent: true }, embeddingType, { kind: 'repo', githubRepoId, localRepoRoot: undefined, indexedCommit: undefined }, options.input.query, 64, {}, new TelemetryCorrelationId('github-repo-tool'), token);
// Map the chunks to URIs
// TODO: Won't work for proxima or branches not called main
// Map the chunks to URIs using the remote URL and ref from the search response
const repoBaseUrl = searchResults.remoteUrl ?? `https://github.com/${toGithubNwo(githubRepoId)}`;
const ref = searchResults.refName ?? 'main';
const chunks = searchResults.chunks.map((entry): FileChunkAndScore => ({
chunk: {
...entry.chunk,
file: URI.joinPath(URI.parse('https://github.com'), toGithubNwo(githubRepoId), 'tree', 'main', entry.chunk.file.path).with({
file: URI.joinPath(URI.parse(repoBaseUrl), 'tree', ref, entry.chunk.file.path).with({
fragment: `L${entry.chunk.range.startLineNumber}-L${entry.chunk.range.endLineNumber}`,
}),
},
@@ -229,4 +229,4 @@ class GithubChunkSearchResults extends PromptElement<GithubChunkSearchResultsPro
}
ToolRegistry.registerTool(GithubRepoTool);
ToolRegistry.registerTool(GithubRepoSemanticSearchTool);
@@ -0,0 +1,183 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import * as l10n from '@vscode/l10n';
import { BasePromptElementProps, PromptElement, PromptPiece, PromptReference, PromptSizing, TextChunk } from '@vscode/prompt-tsx';
import type * as vscode from 'vscode';
import { FileChunkAndScore } from '../../../platform/chunking/common/chunk';
import { GithubRepoId } from '../../../platform/git/common/gitService';
import { GithubCodeSearchScope, IGithubCodeSearchService } from '../../../platform/remoteCodeSearch/common/githubCodeSearchService';
import { createFencedCodeBlock, getLanguageId } from '../../../util/common/markdown';
import { TelemetryCorrelationId } from '../../../util/common/telemetryCorrelationId';
import { isLocation, isUri } from '../../../util/common/types';
import { CancellationToken } from '../../../util/vs/base/common/cancellation';
import { URI } from '../../../util/vs/base/common/uri';
import { IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';
import { ExtendedLanguageModelToolResult, LanguageModelPromptTsxPart, MarkdownString } from '../../../vscodeTypes';
import { getUniqueReferences } from '../../prompt/common/conversation';
import { renderPromptElementJSON } from '../../prompts/node/base/promptRenderer';
import { ToolName } from '../common/toolNames';
import { ICopilotTool, ToolRegistry } from '../common/toolsRegistry';
export interface GithubTextSearchToolParams {
readonly query: string;
/** Either 'owner/repo' for a single repo, or an org name (no slash) */
readonly scope: string;
readonly maxResults?: number;
}
export class GithubTextSearchTool implements ICopilotTool<GithubTextSearchToolParams> {
public static readonly toolName = ToolName.GithubTextSearch;
constructor(
@IInstantiationService private readonly _instantiationService: IInstantiationService,
@IGithubCodeSearchService private readonly _githubCodeSearch: IGithubCodeSearchService,
) { }
async invoke(options: vscode.LanguageModelToolInvocationOptions<GithubTextSearchToolParams>, token: CancellationToken): Promise<vscode.LanguageModelToolResult> {
const scope = parseScope(options.input.scope);
if (!scope) {
throw new Error(l10n.t`Invalid input. Could not parse 'scope' argument`);
}
const maxResults = options.input.maxResults ?? 100;
const searchResults = await this._githubCodeSearch.lexicalSearch(
{ silent: true },
scope,
options.input.query,
maxResults,
{},
new TelemetryCorrelationId('github-text-search-tool'),
token,
);
const chunks = searchResults.chunks.map((entry): FileChunkAndScore => {
let file = entry.file;
if (file.scheme === 'githubRepoResult') {
// Path format: /owner/repo/relative/file/path
const parts = file.path.split('/').filter(Boolean);
if (parts.length >= 3) {
const nwo = `${parts[0]}/${parts[1]}`;
const relativePath = parts.slice(2).join('/');
file = URI.joinPath(URI.parse('https://github.com'), nwo, 'tree', 'main', '/' + relativePath).with({
fragment: entry.range.startLineNumber > 0
? `L${entry.range.startLineNumber}-L${entry.range.endLineNumber}`
: undefined,
});
}
}
return { chunk: { ...entry, file }, distance: undefined };
});
let references: PromptReference[] = [];
const json = await renderPromptElementJSON(this._instantiationService, GithubTextSearchResults, {
chunks,
referencesOut: references,
});
const result = new ExtendedLanguageModelToolResult([
new LanguageModelPromptTsxPart(json),
]);
references = getUniqueReferences(references);
const scopeLabel = options.input.scope;
result.toolResultMessage = references.length === 0 ?
new MarkdownString(l10n.t`Searched ${scopeLabel} for "${options.input.query}", no results`) :
references.length === 1 ?
new MarkdownString(l10n.t`Searched ${scopeLabel} for "${options.input.query}", 1 result`) :
new MarkdownString(l10n.t`Searched ${scopeLabel} for "${options.input.query}", ${references.length} results`);
result.toolResultDetails = references
.map(r => r.anchor)
.filter(r => isUri(r) || isLocation(r));
return result;
}
async prepareInvocation(options: vscode.LanguageModelToolInvocationPrepareOptions<GithubTextSearchToolParams>, _token: vscode.CancellationToken): Promise<vscode.PreparedToolInvocation> {
if (!options.input.scope) {
throw new Error(l10n.t`Invalid input. No 'scope' argument provided`);
}
if (!parseScope(options.input.scope)) {
throw new Error(l10n.t`Invalid input. Could not parse 'scope' argument`);
}
return {
invocationMessage: l10n.t("Searching '{0}' for '{1}'", options.input.scope, options.input.query),
};
}
}
function parseScope(scope: string): GithubCodeSearchScope | undefined {
if (!scope) {
return undefined;
}
if (scope.includes('/')) {
const repoId = GithubRepoId.parse(scope);
if (!repoId) {
return undefined;
}
return { kind: 'repo', githubRepoId: repoId, localRepoRoot: undefined, indexedCommit: undefined };
}
return { kind: 'org', org: scope };
}
interface GithubTextSearchResultsProps extends BasePromptElementProps {
readonly chunks: FileChunkAndScore[];
readonly referencesOut: PromptReference[];
}
class GithubTextSearchResults extends PromptElement<GithubTextSearchResultsProps> {
override render(_state: void, _sizing: PromptSizing): PromptPiece | undefined {
const references: PromptReference[] = [];
const seenFiles = new Set<string>();
const renderedChunks = this.props.chunks
.filter(x => x.chunk.text)
.map(chunk => {
const fileKey = chunk.chunk.file.toString();
if (!seenFiles.has(fileKey)) {
seenFiles.add(fileKey);
references.push(new PromptReference(chunk.chunk.file));
}
const githubInfo = parseGithubFileUrl(chunk.chunk.file);
const displayPath = githubInfo?.path ?? chunk.chunk.file.toString();
const nwoLabel = githubInfo?.nwo;
const lineInfo = ` starting at line ${chunk.chunk.range.startLineNumber}`;
const headerText = nwoLabel
? `Text match excerpt from \`${nwoLabel}\` in \`${displayPath}\`${lineInfo}:`
: `Text match excerpt in \`${displayPath}\`${lineInfo}:`;
return <TextChunk>
{headerText}<br />
{createFencedCodeBlock(getLanguageId(chunk.chunk.file), chunk.chunk.text)}<br /><br />
</TextChunk>;
});
this.props.referencesOut.push(...references);
return <>
<references value={references} />
{renderedChunks}
</>;
}
}
function parseGithubFileUrl(uri: URI): { nwo: string; path: string } | undefined {
if (uri.scheme === 'https' && uri.authority === 'github.com') {
const parts = uri.path.split('/').filter(Boolean);
// Pattern: /owner/repo/tree/branch/...path
if (parts.length >= 4 && parts[2] === 'tree') {
return {
nwo: `${parts[0]}/${parts[1]}`,
path: parts.slice(4).join('/'),
};
}
}
return undefined;
}
ToolRegistry.registerTool(GithubTextSearchTool);
@@ -133,6 +133,8 @@ export interface GitHubAPIRequestOptions {
version?: string;
type?: 'json' | 'text';
userAgent?: string;
accept?: string;
additionalHeaders?: { [key: string]: string };
returnStatusCodeOnError?: boolean;
silent404?: boolean;
callSite?: string;
@@ -147,9 +149,10 @@ export async function makeGitHubAPIRequest(
method: 'GET' | 'POST',
token: string | undefined,
options?: GitHubAPIRequestOptions) {
const { body, version, type = 'json', userAgent, returnStatusCodeOnError = false, silent404 = false, callSite = 'github-api-rest' } = options ?? {};
const { body, version, type = 'json', userAgent, accept, additionalHeaders, returnStatusCodeOnError = false, silent404 = false, callSite = 'github-api-rest' } = options ?? {};
const headers: { [key: string]: string } = {
'Accept': 'application/vnd.github+json',
'Accept': accept ?? 'application/vnd.github+json',
...additionalHeaders,
};
if (token) {
headers['Authorization'] = `Bearer ${token}`;
@@ -26,7 +26,7 @@ import { measureExecTime } from '../../log/common/logExecTime';
import { ILogService } from '../../log/common/logService';
import { getRequest, postRequest } from '../../networking/common/networking';
import { ITelemetryService } from '../../telemetry/common/telemetry';
import { CodeSearchOptions, CodeSearchResult, RemoteCodeSearchError, RemoteCodeSearchIndexState, RemoteCodeSearchIndexStatus } from './remoteCodeSearch';
import { CodeSearchOptions, RemoteCodeSearchError, RemoteCodeSearchIndexState, RemoteCodeSearchIndexStatus, SemanticCodeSearchResult } from './remoteCodeSearch';
interface ResponseShape {
@@ -100,7 +100,7 @@ export interface IAdoCodeSearchService {
options: CodeSearchOptions,
telemetryInfo: TelemetryCorrelationId,
token: CancellationToken,
): Promise<CodeSearchResult>;
): Promise<SemanticCodeSearchResult>;
}
/**
@@ -251,7 +251,7 @@ export class AdoCodeSearchService extends Disposable implements IAdoCodeSearchSe
options: CodeSearchOptions,
telemetryInfo: TelemetryCorrelationId,
token: CancellationToken
): Promise<CodeSearchResult> {
): Promise<SemanticCodeSearchResult> {
const totalSw = new StopWatch();
const authToken = await this.getAdoAuthToken(auth.silent);
@@ -13,19 +13,20 @@ import { URI } from '../../../util/vs/base/common/uri';
import { Range } from '../../../util/vs/editor/common/core/range';
import { createDecorator, IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';
import { IAuthenticationService } from '../../authentication/common/authentication';
import { FileChunkAndScore } from '../../chunking/common/chunk';
import { FileChunk, FileChunkAndScore } from '../../chunking/common/chunk';
import { stripChunkTextMetadata, truncateToMaxUtf8Length } from '../../chunking/common/chunkingStringUtils';
import { EmbeddingType } from '../../embeddings/common/embeddingsComputer';
import { ICAPIClientService } from '../../endpoint/common/capiClient';
import { IEnvService } from '../../env/common/envService';
import { GithubRepoId, toGithubNwo } from '../../git/common/gitService';
import { makeGitHubAPIRequest } from '../../github/common/githubAPI';
import { getGithubMetadataHeaders } from '../../github/common/githubApiFetcherService';
import { IIgnoreService } from '../../ignore/common/ignoreService';
import { ILogService } from '../../log/common/logService';
import { Response } from '../../networking/common/fetcherService';
import { IFetcherService, Response } from '../../networking/common/fetcherService';
import { postRequest } from '../../networking/common/networking';
import { ITelemetryService } from '../../telemetry/common/telemetry';
import { CodeSearchOptions, CodeSearchResult, RemoteCodeSearchError, RemoteCodeSearchIndexState, RemoteCodeSearchIndexStatus } from './remoteCodeSearch';
import { CodeSearchOptions, LexicalCodeSearchResult, RemoteCodeSearchError, RemoteCodeSearchIndexState, RemoteCodeSearchIndexStatus, SemanticCodeSearchResult } from './remoteCodeSearch';
interface ResponseShape {
@@ -46,6 +47,7 @@ type SemanticSearchResult = {
location: {
path: string; // file path
commit_sha: string;
ref_name: string;
repo: {
nwo: string;
url: string;
@@ -54,11 +56,19 @@ type SemanticSearchResult = {
};
export interface GithubCodeSearchRepoInfo {
readonly kind: 'repo';
readonly githubRepoId: GithubRepoId;
readonly localRepoRoot: URI | undefined;
readonly indexedCommit: string | undefined;
}
export interface GithubCodeSearchOrgInfo {
readonly kind: 'org';
readonly org: string;
}
export type GithubCodeSearchScope = GithubCodeSearchRepoInfo | GithubCodeSearchOrgInfo;
export const IGithubCodeSearchService = createDecorator('IGithubCodeSearchService');
export interface IGithubCodeSearchService {
@@ -89,16 +99,29 @@ export interface IGithubCodeSearchService {
*
* The repo must have been indexed first. Make sure to check {@link getRemoteIndexState} or call {@link triggerIndexing}.
*/
searchRepo(
semanticSearch(
authOptions: { readonly silent: boolean },
embeddingType: EmbeddingType,
repo: GithubCodeSearchRepoInfo,
scope: GithubCodeSearchRepoInfo,
query: string,
maxResults: number,
options: CodeSearchOptions,
telemetryInfo: TelemetryCorrelationId,
token: CancellationToken,
): Promise<CodeSearchResult>;
): Promise<SemanticCodeSearchResult>;
/**
* Lexical searches a given github repo or org for relevant code snippets
*/
lexicalSearch(
authOptions: { readonly silent: boolean },
scope: GithubCodeSearchScope,
query: string,
maxResults: number,
options: CodeSearchOptions,
telemetryInfo: TelemetryCorrelationId,
token: CancellationToken,
): Promise<LexicalCodeSearchResult>;
}
export class GithubCodeSearchService implements IGithubCodeSearchService {
@@ -109,6 +132,7 @@ export class GithubCodeSearchService implements IGithubCodeSearchService {
@IAuthenticationService private readonly _authenticationService: IAuthenticationService,
@ICAPIClientService private readonly _capiClientService: ICAPIClientService,
@IEnvService private readonly _envService: IEnvService,
@IFetcherService private readonly _fetcherService: IFetcherService,
@IIgnoreService private readonly _ignoreService: IIgnoreService,
@ILogService private readonly _logService: ILogService,
@ITelemetryService private readonly _telemetryService: ITelemetryService,
@@ -252,7 +276,7 @@ export class GithubCodeSearchService implements IGithubCodeSearchService {
return Result.ok(true);
}
async searchRepo(
async semanticSearch(
auth: { readonly silent: boolean },
embeddingType: EmbeddingType,
repo: GithubCodeSearchRepoInfo,
@@ -261,7 +285,7 @@ export class GithubCodeSearchService implements IGithubCodeSearchService {
options: CodeSearchOptions,
telemetryInfo: TelemetryCorrelationId,
token: CancellationToken
): Promise<CodeSearchResult> {
): Promise<SemanticCodeSearchResult> {
const authToken = await this.getGithubAccessToken(auth.silent);
if (!authToken) {
throw new Error('No valid auth token');
@@ -342,6 +366,80 @@ export class GithubCodeSearchService implements IGithubCodeSearchService {
return result;
}
async lexicalSearch(
auth: { readonly silent: boolean },
scope: GithubCodeSearchScope,
query: string,
maxResults: number,
options: CodeSearchOptions,
telemetryInfo: TelemetryCorrelationId,
token: CancellationToken
): Promise<LexicalCodeSearchResult> {
const authToken = await this.getGithubAccessToken(auth.silent);
if (!authToken) {
throw new Error('No valid auth token');
}
const scopeQualifier = scope.kind === 'org' ? `org:${scope.org}` : `repo:${toGithubNwo(scope.githubRepoId)}`;
const searchQuery = `${query} ${scopeQualifier}`;
const routeSlug = `search/code?q=${encodeURIComponent(searchQuery)}&per_page=${maxResults}`;
const body = await raceCancellationError(makeGitHubAPIRequest(
this._fetcherService,
this._logService,
this._telemetryService,
this._capiClientService.dotcomAPIURL,
routeSlug,
'GET',
authToken,
{
accept: 'application/vnd.github.text-match+json',
additionalHeaders: getGithubMetadataHeaders(telemetryInfo.callTracker, this._envService),
callSite: 'github-code-search-lexical',
},
), token);
if (!body) {
/* __GDPR__
"githubCodeSearch.lexicalSearch.error" : {
"owner": "mjbvz",
"comment": "Information about failed lexical code searches",
"workspaceSearchSource": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Caller of the search" },
"workspaceSearchCorrelationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Correlation id for the search" }
}
*/
this._telemetryService.sendMSFTTelemetryEvent('githubCodeSearch.lexicalSearch.error', {
workspaceSearchSource: telemetryInfo.callTracker.toString(),
workspaceSearchCorrelationId: telemetryInfo.correlationId,
});
throw new Error(`Code search lexical search failed`);
}
if (!Array.isArray(body.items)) {
throw new Error(`Code search lexical search unexpected response json shape`);
}
const result = await raceCancellationError(parseLexicalSearchResponse(body, scope, options, this._ignoreService), token);
/* __GDPR__
"githubCodeSearch.lexicalSearch.success" : {
"owner": "mjbvz",
"comment": "Information about successful lexical code searches",
"workspaceSearchSource": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Caller of the search" },
"workspaceSearchCorrelationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Correlation id for the search" },
"resultCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Total number of returned items from the search" }
}
*/
this._telemetryService.sendMSFTTelemetryEvent('githubCodeSearch.lexicalSearch.success', {
workspaceSearchSource: telemetryInfo.callTracker.toString(),
workspaceSearchCorrelationId: telemetryInfo.correlationId,
}, {
resultCount: body.items.length,
});
return result;
}
private async getGithubAccessToken(silent: boolean) {
return (await this._authenticationService.getGitHubSession('permissive', { silent }))?.accessToken
?? (await this._authenticationService.getGitHubSession('any', { silent }))?.accessToken;
@@ -370,7 +468,7 @@ export class GithubCodeSearchService implements IGithubCodeSearchService {
}
}
export async function parseGithubCodeSearchResponse(body: ResponseShape, repo: GithubCodeSearchRepoInfo, options: CodeSearchOptions & { skipVerifyRepo?: boolean }, ignoreService: IIgnoreService): Promise<CodeSearchResult> {
export async function parseGithubCodeSearchResponse(body: ResponseShape, repo: GithubCodeSearchRepoInfo, options: CodeSearchOptions & { skipVerifyRepo?: boolean }, ignoreService: IIgnoreService): Promise<SemanticCodeSearchResult> {
let outOfSync = false;
const outChunks: FileChunkAndScore[] = [];
@@ -415,5 +513,106 @@ export async function parseGithubCodeSearchResponse(body: ResponseShape, repo: G
});
}));
return { chunks: outChunks, outOfSync };
// Extract the remote URL and ref name from the first result
const firstResult = body.results[0];
let remoteUrl: string | undefined;
let refName: string | undefined;
if (firstResult) {
// Derive the web URL from the API URL (e.g. https://api.github.com/repos/o/r -> https://github.com/o/r)
const apiUrl = firstResult.location.repo.url;
const nwo = firstResult.location.repo.nwo;
try {
const parsed = URI.parse(apiUrl);
const host = parsed.authority === 'api.github.com' ? 'github.com' : parsed.authority.replace(/^api\./, '');
remoteUrl = `https://${host}/${nwo}`;
} catch {
// Fall back to constructing from nwo
remoteUrl = `https://github.com/${nwo}`;
}
// Extract branch name from ref_name (e.g. "refs/heads/main" -> "main")
const rawRef = firstResult.location.ref_name;
if (rawRef?.startsWith('refs/heads/')) {
refName = rawRef.slice('refs/heads/'.length);
} else if (rawRef) {
refName = rawRef;
}
}
return { chunks: outChunks, outOfSync, remoteUrl, refName };
}
interface LexicalSearchResponseShape {
readonly total_count: number;
readonly incomplete_results: boolean;
readonly items: readonly LexicalSearchItem[];
}
type LexicalSearchItem = {
readonly path: string;
readonly repository: {
readonly full_name: string;
};
readonly text_matches?: readonly {
readonly fragment: string;
readonly matches: readonly { readonly text: string; readonly indices: readonly [number, number] }[];
readonly object_type: string;
readonly property: string;
}[];
readonly score: number;
};
export async function parseLexicalSearchResponse(body: LexicalSearchResponseShape, scope: GithubCodeSearchScope & { skipVerifyRepo?: boolean }, options: CodeSearchOptions & { skipVerifyRepo?: boolean }, ignoreService: IIgnoreService): Promise<LexicalCodeSearchResult> {
const outChunks: FileChunk[] = [];
await Promise.all(body.items.map(async (item): Promise<void> => {
if (!options.skipVerifyRepo && scope.kind === 'repo' && item.repository.full_name.toLowerCase() !== toGithubNwo(scope.githubRepoId)) {
return;
}
if (!options.skipVerifyRepo && scope.kind === 'org' && item.repository.full_name.toLowerCase().split('/')[0] !== scope.org.toLowerCase()) {
return;
}
const localRepoRoot = scope.kind === 'repo' ? scope.localRepoRoot : undefined;
let fileUri: URI;
if (localRepoRoot) {
fileUri = URI.joinPath(localRepoRoot, item.path);
if (await ignoreService.isCopilotIgnored(fileUri)) {
return;
}
} else {
fileUri = URI.from({
scheme: 'githubRepoResult',
path: '/' + item.repository.full_name + '/' + item.path
});
}
if (!shouldInclude(fileUri, options.globPatterns)) {
return;
}
const textMatches = item.text_matches?.filter(m => m.property === 'content');
if (textMatches && textMatches.length > 0) {
for (const match of textMatches) {
outChunks.push({
file: fileUri,
text: match.fragment,
rawText: undefined,
range: new Range(0, 0, 0, 0),
isFullFile: false,
});
}
} else {
// No text matches, include the file as a whole-file result
outChunks.push({
file: fileUri,
text: '',
rawText: undefined,
range: new Range(0, 0, 0, 0),
isFullFile: true,
});
}
}));
return { chunks: outChunks, outOfSync: false };
}
@@ -3,7 +3,7 @@
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { GlobIncludeOptions } from '../../../util/common/glob';
import { FileChunkAndScore } from '../../chunking/common/chunk';
import { FileChunk, FileChunkAndScore } from '../../chunking/common/chunk';
export enum RemoteCodeSearchIndexStatus {
/** The repo index is built and ready to use */
@@ -29,11 +29,23 @@ export type RemoteCodeSearchError =
| { readonly type: 'generic-error'; readonly error: Error }
;
export interface CodeSearchResult {
readonly chunks: readonly FileChunkAndScore[];
interface BaseCodeSearchResult {
/** Tracks if the commit sha code search used differs from the one we used to compute the local diff */
readonly outOfSync: boolean;
/** The web URL of the remote repo, e.g. `https://github.com/microsoft/vscode` */
readonly remoteUrl?: string;
/** The branch name the results are from, e.g. `main` */
readonly refName?: string;
}
export interface SemanticCodeSearchResult extends BaseCodeSearchResult {
readonly chunks: readonly FileChunkAndScore[];
}
export interface LexicalCodeSearchResult extends BaseCodeSearchResult {
readonly chunks: readonly FileChunk[];
}
export interface CodeSearchOptions {
@@ -33,7 +33,7 @@ import { Change } from '../../../git/vscode/git';
import { logExecTime, LogExecTime } from '../../../log/common/logExecTime';
import { ILogService } from '../../../log/common/logService';
import { IAdoCodeSearchService } from '../../../remoteCodeSearch/common/adoCodeSearchService';
import { CodeSearchResult } from '../../../remoteCodeSearch/common/remoteCodeSearch';
import { SemanticCodeSearchResult } from '../../../remoteCodeSearch/common/remoteCodeSearch';
import { ICodeSearchAuthenticationService } from '../../../remoteCodeSearch/node/codeSearchRepoAuth';
import { isGitHubRemoteRepository } from '../../../remoteRepositories/common/utils';
import { IExperimentationService } from '../../../telemetry/common/nullExperimentationService';
@@ -555,13 +555,13 @@ export class CodeSearchChunkSearch extends Disposable {
localSearchCts.cancel();
throw e;
})
: Promise.resolve({ chunks: [], outOfSync: false });
: Promise.resolve<SemanticCodeSearchResult>({ chunks: [], outOfSync: false });
const localSearchOperation = raceTimeout(this.searchLocalDiff(diffArray, sizing, query, options, innerTelemetryInfo, localSearchCts.token), this.localDiffSearchTimeout, () => {
localSearchCts.cancel();
});
let codeSearchResults: CodeSearchResult | undefined;
let codeSearchResults: SemanticCodeSearchResult | undefined;
let localResults: DiffSearchResult | undefined;
try {
codeSearchResults = await raceCancellationError(codeSearchOperation, token);
@@ -720,7 +720,7 @@ export class CodeSearchChunkSearch extends Disposable {
*/
this._telemetryService.sendMSFTTelemetryEvent('codeSearchChunkSearch.perf.doCodeSearchWithRetry', { status }, { execTime });
})
private async doCodeSearch(query: WorkspaceChunkQueryWithEmbeddings, repos: readonly CodeSearchRepo[], sizing: StrategySearchSizing, options: WorkspaceChunkSearchOptions, telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise<CodeSearchResult | undefined> {
private async doCodeSearch(query: WorkspaceChunkQueryWithEmbeddings, repos: readonly CodeSearchRepo[], sizing: StrategySearchSizing, options: WorkspaceChunkSearchOptions, telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise<SemanticCodeSearchResult | undefined> {
const results = await Promise.all(repos.map(repo => {
return repo.searchRepo({ silent: true }, this._embeddingType, query.queryText, sizing.maxResultCountHint, options, telemetryInfo, token);
}));
@@ -16,7 +16,7 @@ import { measureExecTime } from '../../../log/common/logExecTime';
import { ILogService } from '../../../log/common/logService';
import { IAdoCodeSearchService } from '../../../remoteCodeSearch/common/adoCodeSearchService';
import { IGithubCodeSearchService } from '../../../remoteCodeSearch/common/githubCodeSearchService';
import { CodeSearchResult, RemoteCodeSearchError, RemoteCodeSearchIndexState, RemoteCodeSearchIndexStatus } from '../../../remoteCodeSearch/common/remoteCodeSearch';
import { RemoteCodeSearchError, RemoteCodeSearchIndexState, RemoteCodeSearchIndexStatus, SemanticCodeSearchResult } from '../../../remoteCodeSearch/common/remoteCodeSearch';
import { ITelemetryService } from '../../../telemetry/common/telemetry';
import { WorkspaceChunkSearchOptions } from '../../common/workspaceChunkSearch';
import { RepoInfo } from './repoTracker';
@@ -149,7 +149,7 @@ export interface CodeSearchRepo extends IDisposable {
options: WorkspaceChunkSearchOptions,
telemetryInfo: TelemetryCorrelationId,
token: CancellationToken
): Promise<CodeSearchResult>;
): Promise<SemanticCodeSearchResult>;
triggerRemoteIndexingOfRepo(triggerReason: BuildIndexTriggerReason, telemetryInfo: TelemetryCorrelationId): Promise<Result<true, TriggerIndexingError>>;
@@ -232,7 +232,7 @@ abstract class BaseRemoteCodeSearchRepo extends Disposable implements CodeSearch
this._onDidChangeStatus.fire(this._state.status);
}
public abstract searchRepo(authOptions: { silent: boolean }, embeddingType: EmbeddingType, resolvedQuery: string, maxResultCountHint: number, options: WorkspaceChunkSearchOptions, telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise<CodeSearchResult>;
public abstract searchRepo(authOptions: { silent: boolean }, embeddingType: EmbeddingType, resolvedQuery: string, maxResultCountHint: number, options: WorkspaceChunkSearchOptions, telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise<SemanticCodeSearchResult>;
public abstract triggerRemoteIndexingOfRepo(triggerReason: BuildIndexTriggerReason, telemetryInfo: TelemetryCorrelationId): Promise<Result<true, TriggerIndexingError>>;
public abstract prepareSearch(telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise<boolean>;
@@ -381,8 +381,9 @@ export class GithubCodeSearchRepo extends BaseRemoteCodeSearchRepo {
super(repoInfo, remoteInfo, logService, telemetryService);
}
public override async searchRepo(authOptions: { silent: boolean }, embeddingType: EmbeddingType, resolvedQuery: string, maxResultCountHint: number, options: WorkspaceChunkSearchOptions, telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise<CodeSearchResult> {
const result = await this._githubCodeSearchService.searchRepo(authOptions, embeddingType, {
public override async searchRepo(authOptions: { silent: boolean }, embeddingType: EmbeddingType, resolvedQuery: string, maxResultCountHint: number, options: WorkspaceChunkSearchOptions, telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise<SemanticCodeSearchResult> {
const result = await this._githubCodeSearchService.semanticSearch(authOptions, embeddingType, {
kind: 'repo',
githubRepoId: this._githubRepoId,
localRepoRoot: this.repoInfo.rootUri,
indexedCommit: undefined, // TODO
@@ -502,7 +503,7 @@ export class AdoCodeSearchRepo extends BaseRemoteCodeSearchRepo {
super(repoInfo, remoteInfo, logService, telemetryService);
}
public searchRepo(authOptions: { silent: boolean }, _embeddingType: EmbeddingType, resolvedQuery: string, maxResultCountHint: number, options: WorkspaceChunkSearchOptions, telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise<CodeSearchResult> {
public searchRepo(authOptions: { silent: boolean }, _embeddingType: EmbeddingType, resolvedQuery: string, maxResultCountHint: number, options: WorkspaceChunkSearchOptions, telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise<SemanticCodeSearchResult> {
return this._adoCodeSearchService.searchRepo(authOptions, {
adoRepoId: this._adoRepoId,
localRepoRoot: this.repoInfo.rootUri,
@@ -7,8 +7,8 @@ import { EmbeddingType } from '../../src/platform/embeddings/common/embeddingsCo
import { GithubRepoId } from '../../src/platform/git/common/gitService';
import { IIgnoreService } from '../../src/platform/ignore/common/ignoreService';
import { ILogService } from '../../src/platform/log/common/logService';
import { GithubCodeSearchRepoInfo, IGithubCodeSearchService, parseGithubCodeSearchResponse } from '../../src/platform/remoteCodeSearch/common/githubCodeSearchService';
import { CodeSearchResult, RemoteCodeSearchError, RemoteCodeSearchIndexState, RemoteCodeSearchIndexStatus } from '../../src/platform/remoteCodeSearch/common/remoteCodeSearch';
import { GithubCodeSearchScope, IGithubCodeSearchService, parseGithubCodeSearchResponse } from '../../src/platform/remoteCodeSearch/common/githubCodeSearchService';
import { LexicalCodeSearchResult, RemoteCodeSearchError, RemoteCodeSearchIndexState, RemoteCodeSearchIndexStatus, SemanticCodeSearchResult } from '../../src/platform/remoteCodeSearch/common/remoteCodeSearch';
import { WorkspaceChunkQuery, WorkspaceChunkSearchOptions } from '../../src/platform/workspaceChunkSearch/common/workspaceChunkSearch';
import { BuildIndexTriggerReason, TriggerIndexingError } from '../../src/platform/workspaceChunkSearch/node/codeSearch/codeSearchRepo';
import { IWorkspaceChunkSearchService, WorkspaceChunkSearchResult, WorkspaceChunkSearchSizing, WorkspaceIndexState } from '../../src/platform/workspaceChunkSearch/node/workspaceChunkSearchService';
@@ -34,7 +34,11 @@ class SimulationGithubCodeSearchService extends Disposable implements IGithubCod
super();
}
async searchRepo(authOptions: { silent: boolean }, embeddingType: EmbeddingType, repo: GithubCodeSearchRepoInfo, query: string, maxResults: number, options: WorkspaceChunkSearchOptions, _telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise<CodeSearchResult> {
async lexicalSearch(_authOptions: { silent: boolean }, _scope: GithubCodeSearchScope, _query: string, _maxResults: number, _options: WorkspaceChunkSearchOptions, _telemetryInfo: TelemetryCorrelationId, _token: CancellationToken): Promise<LexicalCodeSearchResult> {
throw new Error('Method not implemented.');
}
async semanticSearch(authOptions: { silent: boolean }, embeddingType: EmbeddingType, repo: GithubCodeSearchScope & { kind: 'repo' }, query: string, maxResults: number, options: WorkspaceChunkSearchOptions, _telemetryInfo: TelemetryCorrelationId, token: CancellationToken): Promise<SemanticCodeSearchResult> {
this._logService.trace(`SimulationGithubCodeSearchService::searchRepo(${repo.githubRepoId}, ${query})`);
const response = await fetch(searchEndpoint, {
method: 'POST',
@@ -97,7 +101,8 @@ export class SimulationCodeSearchChunkSearchService extends Disposable implement
async searchFileChunks(sizing: WorkspaceChunkSearchSizing, query: WorkspaceChunkQuery, options: WorkspaceChunkSearchOptions, telemetryInfo: TelemetryCorrelationId, progress: Progress<ChatResponsePart> | undefined, token: CancellationToken): Promise<WorkspaceChunkSearchResult> {
const repo = new GithubRepoId('test-org', 'test-repo');
try {
const results = await this._githubCodeSearchService.searchRepo({ silent: true }, EmbeddingType.text3small_512, {
const results = await this._githubCodeSearchService.semanticSearch({ silent: true }, EmbeddingType.text3small_512, {
kind: 'repo',
githubRepoId: repo,
indexedCommit: undefined,
localRepoRoot: undefined,