mirror of
https://github.com/microsoft/vscode.git
synced 2026-05-25 17:57:50 +01:00
Add basic github lexical search
Fixes #312210 This is using the old search endpoint. We'll start switching over the new one once it's ready Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
+209
-10
@@ -13,19 +13,20 @@ import { URI } from '../../../util/vs/base/common/uri';
|
||||
import { Range } from '../../../util/vs/editor/common/core/range';
|
||||
import { createDecorator, IInstantiationService } from '../../../util/vs/platform/instantiation/common/instantiation';
|
||||
import { IAuthenticationService } from '../../authentication/common/authentication';
|
||||
import { FileChunkAndScore } from '../../chunking/common/chunk';
|
||||
import { FileChunk, FileChunkAndScore } from '../../chunking/common/chunk';
|
||||
import { stripChunkTextMetadata, truncateToMaxUtf8Length } from '../../chunking/common/chunkingStringUtils';
|
||||
import { EmbeddingType } from '../../embeddings/common/embeddingsComputer';
|
||||
import { ICAPIClientService } from '../../endpoint/common/capiClient';
|
||||
import { IEnvService } from '../../env/common/envService';
|
||||
import { GithubRepoId, toGithubNwo } from '../../git/common/gitService';
|
||||
import { makeGitHubAPIRequest } from '../../github/common/githubAPI';
|
||||
import { getGithubMetadataHeaders } from '../../github/common/githubApiFetcherService';
|
||||
import { IIgnoreService } from '../../ignore/common/ignoreService';
|
||||
import { ILogService } from '../../log/common/logService';
|
||||
import { Response } from '../../networking/common/fetcherService';
|
||||
import { IFetcherService, Response } from '../../networking/common/fetcherService';
|
||||
import { postRequest } from '../../networking/common/networking';
|
||||
import { ITelemetryService } from '../../telemetry/common/telemetry';
|
||||
import { CodeSearchOptions, CodeSearchResult, RemoteCodeSearchError, RemoteCodeSearchIndexState, RemoteCodeSearchIndexStatus } from './remoteCodeSearch';
|
||||
import { CodeSearchOptions, LexicalCodeSearchResult, RemoteCodeSearchError, RemoteCodeSearchIndexState, RemoteCodeSearchIndexStatus, SemanticCodeSearchResult } from './remoteCodeSearch';
|
||||
|
||||
|
||||
interface ResponseShape {
|
||||
@@ -46,6 +47,7 @@ type SemanticSearchResult = {
|
||||
location: {
|
||||
path: string; // file path
|
||||
commit_sha: string;
|
||||
ref_name: string;
|
||||
repo: {
|
||||
nwo: string;
|
||||
url: string;
|
||||
@@ -54,11 +56,19 @@ type SemanticSearchResult = {
|
||||
};
|
||||
|
||||
export interface GithubCodeSearchRepoInfo {
|
||||
readonly kind: 'repo';
|
||||
readonly githubRepoId: GithubRepoId;
|
||||
readonly localRepoRoot: URI | undefined;
|
||||
readonly indexedCommit: string | undefined;
|
||||
}
|
||||
|
||||
export interface GithubCodeSearchOrgInfo {
|
||||
readonly kind: 'org';
|
||||
readonly org: string;
|
||||
}
|
||||
|
||||
export type GithubCodeSearchScope = GithubCodeSearchRepoInfo | GithubCodeSearchOrgInfo;
|
||||
|
||||
export const IGithubCodeSearchService = createDecorator('IGithubCodeSearchService');
|
||||
|
||||
export interface IGithubCodeSearchService {
|
||||
@@ -89,16 +99,29 @@ export interface IGithubCodeSearchService {
|
||||
*
|
||||
* The repo must have been indexed first. Make sure to check {@link getRemoteIndexState} or call {@link triggerIndexing}.
|
||||
*/
|
||||
searchRepo(
|
||||
semanticSearch(
|
||||
authOptions: { readonly silent: boolean },
|
||||
embeddingType: EmbeddingType,
|
||||
repo: GithubCodeSearchRepoInfo,
|
||||
scope: GithubCodeSearchRepoInfo,
|
||||
query: string,
|
||||
maxResults: number,
|
||||
options: CodeSearchOptions,
|
||||
telemetryInfo: TelemetryCorrelationId,
|
||||
token: CancellationToken,
|
||||
): Promise<CodeSearchResult>;
|
||||
): Promise<SemanticCodeSearchResult>;
|
||||
|
||||
/**
|
||||
* Lexical searches a given github repo or org for relevant code snippets
|
||||
*/
|
||||
lexicalSearch(
|
||||
authOptions: { readonly silent: boolean },
|
||||
scope: GithubCodeSearchScope,
|
||||
query: string,
|
||||
maxResults: number,
|
||||
options: CodeSearchOptions,
|
||||
telemetryInfo: TelemetryCorrelationId,
|
||||
token: CancellationToken,
|
||||
): Promise<LexicalCodeSearchResult>;
|
||||
}
|
||||
|
||||
export class GithubCodeSearchService implements IGithubCodeSearchService {
|
||||
@@ -109,6 +132,7 @@ export class GithubCodeSearchService implements IGithubCodeSearchService {
|
||||
@IAuthenticationService private readonly _authenticationService: IAuthenticationService,
|
||||
@ICAPIClientService private readonly _capiClientService: ICAPIClientService,
|
||||
@IEnvService private readonly _envService: IEnvService,
|
||||
@IFetcherService private readonly _fetcherService: IFetcherService,
|
||||
@IIgnoreService private readonly _ignoreService: IIgnoreService,
|
||||
@ILogService private readonly _logService: ILogService,
|
||||
@ITelemetryService private readonly _telemetryService: ITelemetryService,
|
||||
@@ -252,7 +276,7 @@ export class GithubCodeSearchService implements IGithubCodeSearchService {
|
||||
return Result.ok(true);
|
||||
}
|
||||
|
||||
async searchRepo(
|
||||
async semanticSearch(
|
||||
auth: { readonly silent: boolean },
|
||||
embeddingType: EmbeddingType,
|
||||
repo: GithubCodeSearchRepoInfo,
|
||||
@@ -261,7 +285,7 @@ export class GithubCodeSearchService implements IGithubCodeSearchService {
|
||||
options: CodeSearchOptions,
|
||||
telemetryInfo: TelemetryCorrelationId,
|
||||
token: CancellationToken
|
||||
): Promise<CodeSearchResult> {
|
||||
): Promise<SemanticCodeSearchResult> {
|
||||
const authToken = await this.getGithubAccessToken(auth.silent);
|
||||
if (!authToken) {
|
||||
throw new Error('No valid auth token');
|
||||
@@ -342,6 +366,80 @@ export class GithubCodeSearchService implements IGithubCodeSearchService {
|
||||
return result;
|
||||
}
|
||||
|
||||
async lexicalSearch(
|
||||
auth: { readonly silent: boolean },
|
||||
scope: GithubCodeSearchScope,
|
||||
query: string,
|
||||
maxResults: number,
|
||||
options: CodeSearchOptions,
|
||||
telemetryInfo: TelemetryCorrelationId,
|
||||
token: CancellationToken
|
||||
): Promise<LexicalCodeSearchResult> {
|
||||
const authToken = await this.getGithubAccessToken(auth.silent);
|
||||
if (!authToken) {
|
||||
throw new Error('No valid auth token');
|
||||
}
|
||||
|
||||
const scopeQualifier = scope.kind === 'org' ? `org:${scope.org}` : `repo:${toGithubNwo(scope.githubRepoId)}`;
|
||||
const searchQuery = `${query} ${scopeQualifier}`;
|
||||
const routeSlug = `search/code?q=${encodeURIComponent(searchQuery)}&per_page=${maxResults}`;
|
||||
|
||||
const body = await raceCancellationError(makeGitHubAPIRequest(
|
||||
this._fetcherService,
|
||||
this._logService,
|
||||
this._telemetryService,
|
||||
this._capiClientService.dotcomAPIURL,
|
||||
routeSlug,
|
||||
'GET',
|
||||
authToken,
|
||||
{
|
||||
accept: 'application/vnd.github.text-match+json',
|
||||
additionalHeaders: getGithubMetadataHeaders(telemetryInfo.callTracker, this._envService),
|
||||
callSite: 'github-code-search-lexical',
|
||||
},
|
||||
), token);
|
||||
|
||||
if (!body) {
|
||||
/* __GDPR__
|
||||
"githubCodeSearch.lexicalSearch.error" : {
|
||||
"owner": "mjbvz",
|
||||
"comment": "Information about failed lexical code searches",
|
||||
"workspaceSearchSource": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Caller of the search" },
|
||||
"workspaceSearchCorrelationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Correlation id for the search" }
|
||||
}
|
||||
*/
|
||||
this._telemetryService.sendMSFTTelemetryEvent('githubCodeSearch.lexicalSearch.error', {
|
||||
workspaceSearchSource: telemetryInfo.callTracker.toString(),
|
||||
workspaceSearchCorrelationId: telemetryInfo.correlationId,
|
||||
});
|
||||
|
||||
throw new Error(`Code search lexical search failed`);
|
||||
}
|
||||
if (!Array.isArray(body.items)) {
|
||||
throw new Error(`Code search lexical search unexpected response json shape`);
|
||||
}
|
||||
|
||||
const result = await raceCancellationError(parseLexicalSearchResponse(body, scope, options, this._ignoreService), token);
|
||||
|
||||
/* __GDPR__
|
||||
"githubCodeSearch.lexicalSearch.success" : {
|
||||
"owner": "mjbvz",
|
||||
"comment": "Information about successful lexical code searches",
|
||||
"workspaceSearchSource": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Caller of the search" },
|
||||
"workspaceSearchCorrelationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Correlation id for the search" },
|
||||
"resultCount": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "Total number of returned items from the search" }
|
||||
}
|
||||
*/
|
||||
this._telemetryService.sendMSFTTelemetryEvent('githubCodeSearch.lexicalSearch.success', {
|
||||
workspaceSearchSource: telemetryInfo.callTracker.toString(),
|
||||
workspaceSearchCorrelationId: telemetryInfo.correlationId,
|
||||
}, {
|
||||
resultCount: body.items.length,
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private async getGithubAccessToken(silent: boolean) {
|
||||
return (await this._authenticationService.getGitHubSession('permissive', { silent }))?.accessToken
|
||||
?? (await this._authenticationService.getGitHubSession('any', { silent }))?.accessToken;
|
||||
@@ -370,7 +468,7 @@ export class GithubCodeSearchService implements IGithubCodeSearchService {
|
||||
}
|
||||
}
|
||||
|
||||
export async function parseGithubCodeSearchResponse(body: ResponseShape, repo: GithubCodeSearchRepoInfo, options: CodeSearchOptions & { skipVerifyRepo?: boolean }, ignoreService: IIgnoreService): Promise<CodeSearchResult> {
|
||||
export async function parseGithubCodeSearchResponse(body: ResponseShape, repo: GithubCodeSearchRepoInfo, options: CodeSearchOptions & { skipVerifyRepo?: boolean }, ignoreService: IIgnoreService): Promise<SemanticCodeSearchResult> {
|
||||
let outOfSync = false;
|
||||
const outChunks: FileChunkAndScore[] = [];
|
||||
|
||||
@@ -415,5 +513,106 @@ export async function parseGithubCodeSearchResponse(body: ResponseShape, repo: G
|
||||
});
|
||||
}));
|
||||
|
||||
return { chunks: outChunks, outOfSync };
|
||||
// Extract the remote URL and ref name from the first result
|
||||
const firstResult = body.results[0];
|
||||
let remoteUrl: string | undefined;
|
||||
let refName: string | undefined;
|
||||
if (firstResult) {
|
||||
// Derive the web URL from the API URL (e.g. https://api.github.com/repos/o/r -> https://github.com/o/r)
|
||||
const apiUrl = firstResult.location.repo.url;
|
||||
const nwo = firstResult.location.repo.nwo;
|
||||
try {
|
||||
const parsed = URI.parse(apiUrl);
|
||||
const host = parsed.authority === 'api.github.com' ? 'github.com' : parsed.authority.replace(/^api\./, '');
|
||||
remoteUrl = `https://${host}/${nwo}`;
|
||||
} catch {
|
||||
// Fall back to constructing from nwo
|
||||
remoteUrl = `https://github.com/${nwo}`;
|
||||
}
|
||||
|
||||
// Extract branch name from ref_name (e.g. "refs/heads/main" -> "main")
|
||||
const rawRef = firstResult.location.ref_name;
|
||||
if (rawRef?.startsWith('refs/heads/')) {
|
||||
refName = rawRef.slice('refs/heads/'.length);
|
||||
} else if (rawRef) {
|
||||
refName = rawRef;
|
||||
}
|
||||
}
|
||||
|
||||
return { chunks: outChunks, outOfSync, remoteUrl, refName };
|
||||
}
|
||||
|
||||
interface LexicalSearchResponseShape {
|
||||
readonly total_count: number;
|
||||
readonly incomplete_results: boolean;
|
||||
readonly items: readonly LexicalSearchItem[];
|
||||
}
|
||||
|
||||
type LexicalSearchItem = {
|
||||
readonly path: string;
|
||||
readonly repository: {
|
||||
readonly full_name: string;
|
||||
};
|
||||
readonly text_matches?: readonly {
|
||||
readonly fragment: string;
|
||||
readonly matches: readonly { readonly text: string; readonly indices: readonly [number, number] }[];
|
||||
readonly object_type: string;
|
||||
readonly property: string;
|
||||
}[];
|
||||
readonly score: number;
|
||||
};
|
||||
|
||||
export async function parseLexicalSearchResponse(body: LexicalSearchResponseShape, scope: GithubCodeSearchScope & { skipVerifyRepo?: boolean }, options: CodeSearchOptions & { skipVerifyRepo?: boolean }, ignoreService: IIgnoreService): Promise<LexicalCodeSearchResult> {
|
||||
const outChunks: FileChunk[] = [];
|
||||
|
||||
await Promise.all(body.items.map(async (item): Promise<void> => {
|
||||
if (!options.skipVerifyRepo && scope.kind === 'repo' && item.repository.full_name.toLowerCase() !== toGithubNwo(scope.githubRepoId)) {
|
||||
return;
|
||||
}
|
||||
if (!options.skipVerifyRepo && scope.kind === 'org' && item.repository.full_name.toLowerCase().split('/')[0] !== scope.org.toLowerCase()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const localRepoRoot = scope.kind === 'repo' ? scope.localRepoRoot : undefined;
|
||||
let fileUri: URI;
|
||||
if (localRepoRoot) {
|
||||
fileUri = URI.joinPath(localRepoRoot, item.path);
|
||||
if (await ignoreService.isCopilotIgnored(fileUri)) {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
fileUri = URI.from({
|
||||
scheme: 'githubRepoResult',
|
||||
path: '/' + item.repository.full_name + '/' + item.path
|
||||
});
|
||||
}
|
||||
|
||||
if (!shouldInclude(fileUri, options.globPatterns)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const textMatches = item.text_matches?.filter(m => m.property === 'content');
|
||||
if (textMatches && textMatches.length > 0) {
|
||||
for (const match of textMatches) {
|
||||
outChunks.push({
|
||||
file: fileUri,
|
||||
text: match.fragment,
|
||||
rawText: undefined,
|
||||
range: new Range(0, 0, 0, 0),
|
||||
isFullFile: false,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// No text matches, include the file as a whole-file result
|
||||
outChunks.push({
|
||||
file: fileUri,
|
||||
text: '',
|
||||
rawText: undefined,
|
||||
range: new Range(0, 0, 0, 0),
|
||||
isFullFile: true,
|
||||
});
|
||||
}
|
||||
}));
|
||||
|
||||
return { chunks: outChunks, outOfSync: false };
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user