From bd95a8d4516d590256eefecfce465a2f10e8f5e5 Mon Sep 17 00:00:00 2001 From: Jackson Kearl Date: Wed, 20 Apr 2022 20:38:20 -0700 Subject: [PATCH] Implement language detection for notebook cells (#147537) * Implement language detection for notebook cells * Add lightbulb for normal text editors as well Clean up mapping of language id's in neural model Add config to opt out of language detection hints --- .../browser/parts/editor/editorStatus.ts | 2 +- .../browser/workbench.contribution.ts | 13 ++ .../browser/languageDetection.contribution.ts | 148 ++++++++++++++++++ .../cellStatusBar/statusBarProviders.ts | 74 ++++++++- .../browser/controller/editActions.ts | 66 +++++--- .../notebook/browser/notebookBrowser.ts | 1 + .../browser/languageDetectionSimpleWorker.ts | 51 +++--- .../languageDetectionWorkerServiceImpl.ts | 58 ++++--- .../common/languageDetectionWorkerService.ts | 3 +- src/vs/workbench/workbench.common.main.ts | 3 + 10 files changed, 358 insertions(+), 61 deletions(-) create mode 100644 src/vs/workbench/contrib/languageDetection/browser/languageDetection.contribution.ts diff --git a/src/vs/workbench/browser/parts/editor/editorStatus.ts b/src/vs/workbench/browser/parts/editor/editorStatus.ts index 92091e6054d..7571481ae97 100644 --- a/src/vs/workbench/browser/parts/editor/editorStatus.ts +++ b/src/vs/workbench/browser/parts/editor/editorStatus.ts @@ -1202,7 +1202,7 @@ export class ChangeLanguageAction extends Action { if (resource) { // Detect languages since we are in an untitled file let languageId: string | undefined = withNullAsUndefined(this.languageService.guessLanguageIdByFilepathOrFirstLine(resource, textModel.getLineContent(1))); - if (!languageId) { + if (!languageId || languageId === 'unknown') { detectedLanguage = await this.languageDetectionService.detectLanguage(resource); languageId = detectedLanguage; } diff --git a/src/vs/workbench/browser/workbench.contribution.ts b/src/vs/workbench/browser/workbench.contribution.ts index 77d6942b08a..903ca298a14 100644 --- a/src/vs/workbench/browser/workbench.contribution.ts +++ b/src/vs/workbench/browser/workbench.contribution.ts @@ -111,6 +111,19 @@ const registry = Registry.as(ConfigurationExtensions.Con tags: ['experimental'], description: localize('workbench.editor.preferBasedLanguageDetection', "When enabled, a language detection model that takes into account editor history will be given higher precedence."), }, + 'workbench.editor.languageDetectionHints': { + type: 'string', + default: 'always', + tags: ['experimental'], + enum: ['always', 'notebookEditors', 'textEditors', 'never'], + description: localize('workbench.editor.showLanguageDetectionHints', "When enabled, shows a status bar quick fix when the editor language doesn't match detected content language."), + enumDescriptions: [ + localize('workbench.editor.showLanguageDetectionHints.always', "Show show language detection quick fixes in both notebooks and untitled editors"), + localize('workbench.editor.showLanguageDetectionHints.notebook', "Only show language detection quick fixes in notebooks"), + localize('workbench.editor.showLanguageDetectionHints.editors', "Only show language detection quick fixes in untitled editors"), + localize('workbench.editor.showLanguageDetectionHints.never', "Never show language quick fixes"), + ] + }, 'workbench.editor.tabCloseButton': { 'type': 'string', 'enum': ['left', 'right', 'off'], diff --git a/src/vs/workbench/contrib/languageDetection/browser/languageDetection.contribution.ts b/src/vs/workbench/contrib/languageDetection/browser/languageDetection.contribution.ts new file mode 100644 index 00000000000..3df19dec7a5 --- /dev/null +++ b/src/vs/workbench/contrib/languageDetection/browser/languageDetection.contribution.ts @@ -0,0 +1,148 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { DisposableStore } from 'vs/base/common/lifecycle'; +import { getCodeEditor } from 'vs/editor/browser/editorBrowser'; +import { localize } from 'vs/nls'; +import { Registry } from 'vs/platform/registry/common/platform'; +import { IWorkbenchContributionsRegistry, Extensions as WorkbenchExtensions, IWorkbenchContribution } from 'vs/workbench/common/contributions'; +import { IEditorService } from 'vs/workbench/services/editor/common/editorService'; +import { LifecyclePhase } from 'vs/workbench/services/lifecycle/common/lifecycle'; +import { IStatusbarEntry, IStatusbarEntryAccessor, IStatusbarService, StatusbarAlignment } from 'vs/workbench/services/statusbar/browser/statusbar'; +import { ILanguageDetectionService } from 'vs/workbench/services/languageDetection/common/languageDetectionWorkerService'; +import { ThrottledDelayer } from 'vs/base/common/async'; +import { ILanguageService } from 'vs/editor/common/languages/language'; +import { IKeybindingService } from 'vs/platform/keybinding/common/keybinding'; +import { ServicesAccessor } from 'vs/editor/browser/editorExtensions'; +import { registerAction2, Action2 } from 'vs/platform/actions/common/actions'; +import { INotificationService } from 'vs/platform/notification/common/notification'; +import { ContextKeyExpr } from 'vs/platform/contextkey/common/contextkey'; +import { KeybindingWeight } from 'vs/platform/keybinding/common/keybindingsRegistry'; +import { NOTEBOOK_EDITOR_EDITABLE } from 'vs/workbench/contrib/notebook/common/notebookContextKeys'; +import { KeyCode, KeyMod } from 'vs/base/common/keyCodes'; +import { EditorContextKeys } from 'vs/editor/common/editorContextKeys'; +import { Schemas } from 'vs/base/common/network'; +import { IConfigurationService } from 'vs/platform/configuration/common/configuration'; + +const detectLanguageCommandId = 'editor.detectLanguage'; + +class LanguageDetectionStatusContribution implements IWorkbenchContribution { + + private static readonly _id = 'status.languageDetectionStatus'; + + private readonly _disposables = new DisposableStore(); + private _combinedEntry?: IStatusbarEntryAccessor; + private _delayer = new ThrottledDelayer(1000); + private _renderDisposables = new DisposableStore(); + + constructor( + @ILanguageDetectionService private readonly _languageDetectionService: ILanguageDetectionService, + @IStatusbarService private readonly _statusBarService: IStatusbarService, + @IConfigurationService private readonly _configurationService: IConfigurationService, + @IEditorService private readonly _editorService: IEditorService, + @ILanguageService private readonly _languageService: ILanguageService, + @IKeybindingService private readonly _keybindingService: IKeybindingService, + ) { + _editorService.onDidActiveEditorChange(() => this._update(true), this, this._disposables); + this._update(false); + } + + dispose(): void { + this._disposables.dispose(); + this._delayer.dispose(); + this._combinedEntry?.dispose(); + this._renderDisposables.dispose(); + } + + private _update(clear: boolean): void { + if (clear) { + this._combinedEntry?.dispose(); + this._combinedEntry = undefined; + } + this._delayer.trigger(() => this._doUpdate()); + } + + private async _doUpdate(): Promise { + const editor = getCodeEditor(this._editorService.activeTextEditorControl); + + this._renderDisposables.clear(); + + // update when editor language changes + editor?.onDidChangeModelLanguage(() => this._update(true), this, this._renderDisposables); + editor?.onDidChangeModelContent(() => this._update(false), this, this._renderDisposables); + const editorModel = editor?.getModel(); + const editorUri = editorModel?.uri; + const existingId = editorModel?.getLanguageId(); + const enablementConfig = this._configurationService.getValue('workbench.editor.languageDetectionHints'); + const enabled = enablementConfig === 'always' || enablementConfig === 'textEditors'; + const disableLightbulb = !enabled || editorUri?.scheme !== Schemas.untitled || !existingId; + + if (disableLightbulb || !editorUri) { + this._combinedEntry?.dispose(); + this._combinedEntry = undefined; + } else { + const lang = await this._languageDetectionService.detectLanguage(editorUri); + const skip: Record = { 'jsonc': 'json' }; + const existing = editorModel.getLanguageId(); + if (lang && lang !== existing && skip[existing] !== lang) { + const detectedName = this._languageService.getLanguageName(lang) || lang; + let tooltip = localize('status.autoDetectLanguage', "Accept Detected Language: {0}", detectedName); + const keybinding = this._keybindingService.lookupKeybinding(detectLanguageCommandId); + const label = keybinding?.getLabel(); + if (label) { + tooltip += ` (${label})`; + } + + const props: IStatusbarEntry = { + name: localize('langDetection.name', "Language Detection"), + ariaLabel: localize('langDetection.aria', "Change to Detected Language: {0}", lang), + tooltip, + command: detectLanguageCommandId, + text: '$(lightbulb-autofix)', + }; + if (!this._combinedEntry) { + this._combinedEntry = this._statusBarService.addEntry(props, LanguageDetectionStatusContribution._id, StatusbarAlignment.RIGHT, { id: 'status.editor.mode', alignment: StatusbarAlignment.RIGHT, compact: true }); + } else { + this._combinedEntry.update(props); + } + } else { + this._combinedEntry?.dispose(); + this._combinedEntry = undefined; + } + } + } +} + +Registry.as(WorkbenchExtensions.Workbench).registerWorkbenchContribution(LanguageDetectionStatusContribution, LifecyclePhase.Restored); + + +registerAction2(class extends Action2 { + + constructor() { + super({ + id: detectLanguageCommandId, + title: localize('detectlang', 'Detect Language from Content'), + f1: true, + precondition: ContextKeyExpr.and(NOTEBOOK_EDITOR_EDITABLE.toNegated(), EditorContextKeys.editorTextFocus), + keybinding: { primary: KeyCode.KeyE | KeyMod.CtrlCmd, weight: KeybindingWeight.WorkbenchContrib } + }); + } + + async run(accessor: ServicesAccessor): Promise { + const editorService = accessor.get(IEditorService); + const languageDetectionService = accessor.get(ILanguageDetectionService); + const editor = getCodeEditor(editorService.activeTextEditorControl); + const notificationService = accessor.get(INotificationService); + const editorUri = editor?.getModel()?.uri; + if (editorUri) { + const lang = await languageDetectionService.detectLanguage(editorUri); + if (lang) { + editor.getModel()?.setMode(lang); + } else { + notificationService.warn(localize('noDetection', "Unable to detect editor language")); + } + } + } +}); diff --git a/src/vs/workbench/contrib/notebook/browser/contrib/cellStatusBar/statusBarProviders.ts b/src/vs/workbench/contrib/notebook/browser/contrib/cellStatusBar/statusBarProviders.ts index f1834e4ee96..3747a011ae7 100644 --- a/src/vs/workbench/contrib/notebook/browser/contrib/cellStatusBar/statusBarProviders.ts +++ b/src/vs/workbench/contrib/notebook/browser/contrib/cellStatusBar/statusBarProviders.ts @@ -3,18 +3,23 @@ * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ +import { Delayer } from 'vs/base/common/async'; import { CancellationToken } from 'vs/base/common/cancellation'; import { Disposable } from 'vs/base/common/lifecycle'; import { URI } from 'vs/base/common/uri'; import { ILanguageService } from 'vs/editor/common/languages/language'; import { localize } from 'vs/nls'; +import { IConfigurationService } from 'vs/platform/configuration/common/configuration'; import { IInstantiationService } from 'vs/platform/instantiation/common/instantiation'; +import { IKeybindingService } from 'vs/platform/keybinding/common/keybinding'; import { Registry } from 'vs/platform/registry/common/platform'; import { Extensions as WorkbenchExtensions, IWorkbenchContributionsRegistry } from 'vs/workbench/common/contributions'; -import { CHANGE_CELL_LANGUAGE } from 'vs/workbench/contrib/notebook/browser/notebookBrowser'; +import { CHANGE_CELL_LANGUAGE, DETECT_CELL_LANGUAGE } from 'vs/workbench/contrib/notebook/browser/notebookBrowser'; import { INotebookCellStatusBarService } from 'vs/workbench/contrib/notebook/common/notebookCellStatusBarService'; import { CellKind, CellStatusbarAlignment, INotebookCellStatusBarItem, INotebookCellStatusBarItemList, INotebookCellStatusBarItemProvider } from 'vs/workbench/contrib/notebook/common/notebookCommon'; +import { INotebookKernelService } from 'vs/workbench/contrib/notebook/common/notebookKernelService'; import { INotebookService } from 'vs/workbench/contrib/notebook/common/notebookService'; +import { ILanguageDetectionService } from 'vs/workbench/services/languageDetection/common/languageDetectionWorkerService'; import { LifecyclePhase } from 'vs/workbench/services/lifecycle/common/lifecycle'; class CellStatusBarLanguagePickerProvider implements INotebookCellStatusBarItemProvider { @@ -50,6 +55,72 @@ class CellStatusBarLanguagePickerProvider implements INotebookCellStatusBarItemP } } +class CellStatusBarLanguageDetectionProvider implements INotebookCellStatusBarItemProvider { + + readonly viewType = '*'; + + private delayer = new Delayer(500); + + constructor( + @INotebookService private readonly _notebookService: INotebookService, + @INotebookKernelService private readonly _notebookKernelService: INotebookKernelService, + @ILanguageService private readonly _languageService: ILanguageService, + @IConfigurationService private readonly _configurationService: IConfigurationService, + @ILanguageDetectionService private readonly _languageDetectionService: ILanguageDetectionService, + @IKeybindingService private readonly _keybindingService: IKeybindingService, + ) { } + + async provideCellStatusBarItems(uri: URI, index: number, token: CancellationToken): Promise { + return await this.delayer.trigger(async () => { + const doc = this._notebookService.getNotebookTextModel(uri); + const cell = doc?.cells[index]; + if (!cell || token.isCancellationRequested) { + return; + } + + const enablementConfig = this._configurationService.getValue('workbench.editor.languageDetectionHints'); + const enabled = enablementConfig === 'always' || enablementConfig === 'notebookEditors'; + if (!enabled) { + return; + } + + const currentLanguageId = cell.cellKind === CellKind.Markup ? + 'markdown' : + (this._languageService.getLanguageIdByLanguageName(cell.language) || cell.language); + + const kernel = this._notebookKernelService.getMatchingKernel(doc); + const items: INotebookCellStatusBarItem[] = []; + + if (kernel.selected) { + const availableLangs = []; + availableLangs.push(...kernel.selected.supportedLanguages, 'markdown'); + const detectedLanguageId = await this._languageDetectionService.detectLanguage(cell.uri, availableLangs); + + if (detectedLanguageId && currentLanguageId !== detectedLanguageId) { + const detectedName = this._languageService.getLanguageName(detectedLanguageId) || detectedLanguageId; + let tooltip = localize('notebook.cell.status.autoDetectLanguage', "Accept Detected Language: {0}", detectedName); + const keybinding = this._keybindingService.lookupKeybinding(DETECT_CELL_LANGUAGE); + const label = keybinding?.getLabel(); + if (label) { + tooltip += ` (${label})`; + } + + items.push({ + text: '$(lightbulb-autofix)', + command: DETECT_CELL_LANGUAGE, + tooltip, + alignment: CellStatusbarAlignment.Right, + priority: -Number.MAX_SAFE_INTEGER + 1 + }); + } + } + + return { items }; + }); + + } +} + class BuiltinCellStatusBarProviders extends Disposable { constructor( @IInstantiationService instantiationService: IInstantiationService, @@ -58,6 +129,7 @@ class BuiltinCellStatusBarProviders extends Disposable { const builtinProviders = [ CellStatusBarLanguagePickerProvider, + CellStatusBarLanguageDetectionProvider, ]; builtinProviders.forEach(p => { this._register(notebookCellStatusBarService.registerCellStatusBarItemProvider(instantiationService.createInstance(p))); diff --git a/src/vs/workbench/contrib/notebook/browser/controller/editActions.ts b/src/vs/workbench/contrib/notebook/browser/controller/editActions.ts index 588675f6403..27a1c5b7c21 100644 --- a/src/vs/workbench/contrib/notebook/browser/controller/editActions.ts +++ b/src/vs/workbench/contrib/notebook/browser/controller/editActions.ts @@ -21,12 +21,13 @@ import { IQuickInputService, IQuickPickItem, QuickPickInput } from 'vs/platform/ import { changeCellToKind, runDeleteAction } from 'vs/workbench/contrib/notebook/browser/controller/cellOperations'; import { CellToolbarOrder, CELL_TITLE_CELL_GROUP_ID, CELL_TITLE_OUTPUT_GROUP_ID, executeNotebookCondition, INotebookActionContext, INotebookCellActionContext, NotebookAction, NotebookCellAction, NOTEBOOK_EDITOR_WIDGET_ACTION_WEIGHT } from 'vs/workbench/contrib/notebook/browser/controller/coreActions'; import { NOTEBOOK_CELL_EDITABLE, NOTEBOOK_CELL_HAS_OUTPUTS, NOTEBOOK_CELL_LIST_FOCUSED, NOTEBOOK_CELL_MARKDOWN_EDIT_MODE, NOTEBOOK_CELL_TYPE, NOTEBOOK_EDITOR_EDITABLE, NOTEBOOK_EDITOR_FOCUSED, NOTEBOOK_HAS_OUTPUTS, NOTEBOOK_IS_ACTIVE_EDITOR, NOTEBOOK_USE_CONSOLIDATED_OUTPUT_BUTTON } from 'vs/workbench/contrib/notebook/common/notebookContextKeys'; -import { CellEditState, CHANGE_CELL_LANGUAGE, QUIT_EDIT_CELL_COMMAND_ID } from 'vs/workbench/contrib/notebook/browser/notebookBrowser'; +import { CellEditState, CHANGE_CELL_LANGUAGE, DETECT_CELL_LANGUAGE, QUIT_EDIT_CELL_COMMAND_ID } from 'vs/workbench/contrib/notebook/browser/notebookBrowser'; import * as icons from 'vs/workbench/contrib/notebook/browser/notebookIcons'; import { CellEditType, CellKind, ICellEditOperation, NotebookCellExecutionState } from 'vs/workbench/contrib/notebook/common/notebookCommon'; import { ICellRange } from 'vs/workbench/contrib/notebook/common/notebookRange'; import { ILanguageDetectionService } from 'vs/workbench/services/languageDetection/common/languageDetectionWorkerService'; import { INotebookExecutionStateService } from 'vs/workbench/contrib/notebook/common/notebookExecutionStateService'; +import { INotificationService } from 'vs/platform/notification/common/notification'; const CLEAR_ALL_CELLS_OUTPUTS_COMMAND_ID = 'notebook.clearAllCellsOutputs'; const EDIT_CELL_COMMAND_ID = 'notebook.cell.edit'; @@ -437,23 +438,7 @@ registerAction2(class ChangeCellLanguageAction extends NotebookCellAction undefined, undefined, true - ); - } + await setCellToLanguage(languageId, context); } /** @@ -478,3 +463,48 @@ registerAction2(class ChangeCellLanguageAction extends NotebookCellAction { + const languageDetectionService = accessor.get(ILanguageDetectionService); + const notificationService = accessor.get(INotificationService); + const providerLanguages = [...context.notebookEditor.activeKernel?.supportedLanguages ?? []]; + providerLanguages.push('markdown'); + const detection = await languageDetectionService.detectLanguage(context.cell.uri, providerLanguages); + if (detection) { + setCellToLanguage(detection, context); + } else { + notificationService.warn(localize('noDetection', "Unable to detect cell language")); + } + } +}); + +async function setCellToLanguage(languageId: string, context: IChangeCellContext) { + if (languageId === 'markdown' && context.cell?.language !== 'markdown') { + const idx = context.notebookEditor.getCellIndex(context.cell); + await changeCellToKind(CellKind.Markup, { cell: context.cell, notebookEditor: context.notebookEditor, ui: true }, 'markdown', Mimes.markdown); + const newCell = context.notebookEditor.cellAt(idx); + + if (newCell) { + context.notebookEditor.focusNotebookCell(newCell, 'editor'); + } + } else if (languageId !== 'markdown' && context.cell?.cellKind === CellKind.Markup) { + await changeCellToKind(CellKind.Code, { cell: context.cell, notebookEditor: context.notebookEditor, ui: true }, languageId); + } else { + const index = context.notebookEditor.textModel.cells.indexOf(context.cell.model); + context.notebookEditor.textModel.applyEdits( + [{ editType: CellEditType.CellLanguage, index, language: languageId }], + true, undefined, () => undefined, undefined, true + ); + } +} diff --git a/src/vs/workbench/contrib/notebook/browser/notebookBrowser.ts b/src/vs/workbench/contrib/notebook/browser/notebookBrowser.ts index 7153bcfe26c..34c9a2afe21 100644 --- a/src/vs/workbench/contrib/notebook/browser/notebookBrowser.ts +++ b/src/vs/workbench/contrib/notebook/browser/notebookBrowser.ts @@ -31,6 +31,7 @@ import { IEditorOptions } from 'vs/editor/common/config/editorOptions'; //#region Shared commands export const EXPAND_CELL_INPUT_COMMAND_ID = 'notebook.cell.expandCellInput'; export const EXECUTE_CELL_COMMAND_ID = 'notebook.cell.execute'; +export const DETECT_CELL_LANGUAGE = 'notebook.cell.detectLanguage'; export const CHANGE_CELL_LANGUAGE = 'notebook.cell.changeLanguage'; export const QUIT_EDIT_CELL_COMMAND_ID = 'notebook.cell.quitEdit'; export const EXPAND_CELL_OUTPUT_COMMAND_ID = 'notebook.cell.expandCellOutput'; diff --git a/src/vs/workbench/services/languageDetection/browser/languageDetectionSimpleWorker.ts b/src/vs/workbench/services/languageDetection/browser/languageDetectionSimpleWorker.ts index 2d9166e9ed5..59d8bf7ef8b 100644 --- a/src/vs/workbench/services/languageDetection/browser/languageDetectionSimpleWorker.ts +++ b/src/vs/workbench/services/languageDetection/browser/languageDetectionSimpleWorker.ts @@ -9,7 +9,7 @@ import { IRequestHandler } from 'vs/base/common/worker/simpleWorker'; import { EditorSimpleWorker } from 'vs/editor/common/services/editorSimpleWorker'; import { IEditorWorkerHost } from 'vs/editor/common/services/editorWorkerHost'; -type RegexpModel = { detect: (inp: string, langBiases: Record) => string | undefined }; +type RegexpModel = { detect: (inp: string, langBiases: Record, supportedLangs?: string[]) => string | undefined }; /** * Called on the worker side @@ -34,7 +34,9 @@ export class LanguageDetectionSimpleWorker extends EditorSimpleWorker { private _modelOperations: ModelOperations | undefined; private _loadFailed: boolean = false; - public async detectLanguage(uri: string, langBiases: Record | undefined, preferHistory: boolean): Promise { + private modelIdToCoreId = new Map(); + + public async detectLanguage(uri: string, langBiases: Record | undefined, preferHistory: boolean, supportedLangs?: string[]): Promise { const languages: string[] = []; const confidences: number[] = []; const stopWatch = new StopWatch(true); @@ -43,8 +45,14 @@ export class LanguageDetectionSimpleWorker extends EditorSimpleWorker { const neuralResolver = async () => { for await (const language of this.detectLanguagesImpl(documentTextSample)) { - languages.push(language.languageId); - confidences.push(language.confidence); + if (!this.modelIdToCoreId.has(language.languageId)) { + this.modelIdToCoreId.set(language.languageId, await this._host.fhr('getLanguageId', [language.languageId])); + } + const coreId = this.modelIdToCoreId.get(language.languageId); + if (coreId && (!supportedLangs?.length || supportedLangs.includes(coreId))) { + languages.push(coreId); + confidences.push(language.confidence); + } } stopWatch.stop(); @@ -55,15 +63,7 @@ export class LanguageDetectionSimpleWorker extends EditorSimpleWorker { return undefined; }; - const historicalResolver = async () => { - if (langBiases) { - const regexpDetection = await this.runRegexpModel(documentTextSample, langBiases); - if (regexpDetection) { - return regexpDetection; - } - } - return undefined; - }; + const historicalResolver = async () => this.runRegexpModel(documentTextSample, langBiases ?? {}, supportedLangs); if (preferHistory) { const history = await historicalResolver(); @@ -112,11 +112,22 @@ export class LanguageDetectionSimpleWorker extends EditorSimpleWorker { } } - private async runRegexpModel(content: string, langBiases: Record): Promise { + private async runRegexpModel(content: string, langBiases: Record, supportedLangs?: string[]): Promise { const regexpModel = await this.getRegexpModel(); if (!regexpModel) { return; } - const detected = regexpModel.detect(content, langBiases); + if (supportedLangs?.length) { + // When using supportedLangs, normally computed biases are too extreme. Just use a "bitmask" of sorts. + for (const lang of Object.keys(langBiases)) { + if (supportedLangs.includes(lang)) { + langBiases[lang] = 1; + } else { + langBiases[lang] = 0; + } + } + } + + const detected = regexpModel.detect(content, langBiases, supportedLangs); return detected; } @@ -156,21 +167,21 @@ export class LanguageDetectionSimpleWorker extends EditorSimpleWorker { // For the following languages, we increase the confidence because // these are commonly used languages in VS Code and supported // by the model. - case 'javascript': + case 'js': case 'html': case 'json': - case 'typescript': + case 'ts': case 'css': - case 'python': + case 'py': case 'xml': case 'php': modelResult.confidence += LanguageDetectionSimpleWorker.positiveConfidenceCorrectionBucket1; break; // case 'yaml': // YAML has been know to cause incorrect language detection because the language is pretty simple. We don't want to increase the confidence for this. case 'cpp': - case 'shellscript': + case 'sh': case 'java': - case 'csharp': + case 'cs': case 'c': modelResult.confidence += LanguageDetectionSimpleWorker.positiveConfidenceCorrectionBucket2; break; diff --git a/src/vs/workbench/services/languageDetection/browser/languageDetectionWorkerServiceImpl.ts b/src/vs/workbench/services/languageDetection/browser/languageDetectionWorkerServiceImpl.ts index eaa283b67d3..9db520ddcb0 100644 --- a/src/vs/workbench/services/languageDetection/browser/languageDetectionWorkerServiceImpl.ts +++ b/src/vs/workbench/services/languageDetection/browser/languageDetectionWorkerServiceImpl.ts @@ -53,7 +53,7 @@ export class LanguageDetectionService extends Disposable implements ILanguageDet constructor( @IWorkbenchEnvironmentService private readonly _environmentService: IWorkbenchEnvironmentService, - @ILanguageService private readonly _languageService: ILanguageService, + @ILanguageService languageService: ILanguageService, @IConfigurationService private readonly _configurationService: IConfigurationService, @IDiagnosticsService private readonly _diagnosticsService: IDiagnosticsService, @IWorkspaceContextService private readonly _workspaceContextService: IWorkspaceContextService, @@ -68,6 +68,7 @@ export class LanguageDetectionService extends Disposable implements ILanguageDet this._languageDetectionWorkerClient = new LanguageDetectionWorkerClient( modelService, + languageService, telemetryService, // TODO: See if it's possible to bundle vscode-languagedetection this._environmentService.isBuilt && !isWeb @@ -95,7 +96,7 @@ export class LanguageDetectionService extends Disposable implements ILanguageDet let count = 0; for (const ext of fileExtensions.extensions) { - const langId = this.getLanguageId(ext); + const langId = this._languageDetectionWorkerClient.getLanguageId(ext); if (langId && count < TOP_LANG_COUNTS) { this.workspaceLanguageIds.add(langId); count++; @@ -109,15 +110,6 @@ export class LanguageDetectionService extends Disposable implements ILanguageDet return !!languageId && this._configurationService.getValue(LanguageDetectionService.enablementSettingKey, { overrideIdentifier: languageId }); } - private getLanguageId(language: string | undefined): string | undefined { - if (!language) { - return undefined; - } - if (this._languageService.isRegisteredLanguageId(language)) { - return language; - } - return this._languageService.guessLanguageIdByFilepathOrFirstLine(URI.file(`file.${language}`)) ?? undefined; - } private getLanguageBiases(): Record { if (!this.dirtyBiases) { return this.langBiases; } @@ -147,19 +139,14 @@ export class LanguageDetectionService extends Disposable implements ILanguageDet return biases; } - async detectLanguage(resource: URI): Promise { + async detectLanguage(resource: URI, supportedLangs?: string[]): Promise { const useHistory = this._configurationService.getValue(LanguageDetectionService.historyBasedEnablementConfig); const preferHistory = this._configurationService.getValue(LanguageDetectionService.preferHistoryConfig); if (useHistory) { await this.resolveWorkspaceLanguageIds(); } const biases = useHistory ? this.getLanguageBiases() : undefined; - const language = await this._languageDetectionWorkerClient.detectLanguage(resource, biases, preferHistory); - - if (language) { - return this.getLanguageId(language); - } - return undefined; + return this._languageDetectionWorkerClient.detectLanguage(resource, biases, preferHistory, supportedLangs); } private initEditorOpenedListeners(storageService: IStorageService) { @@ -234,6 +221,7 @@ export class LanguageDetectionWorkerClient extends EditorWorkerClient { constructor( modelService: IModelService, + private readonly _languageService: ILanguageService, private readonly _telemetryService: ITelemetryService, private readonly _indexJsUri: string, private readonly _modelJsonUri: string, @@ -260,6 +248,14 @@ export class LanguageDetectionWorkerClient extends EditorWorkerClient { return this.workerPromise; } + private _guessLanguageIdByUri(uri: URI): string | undefined { + const guess = this._languageService.guessLanguageIdByFilepathOrFirstLine(uri); + if (guess && guess !== 'unknown') { + return guess; + } + return undefined; + } + override async _getProxy(): Promise { return (await this._getOrCreateLanguageDetectionWorker()).getProxyObject(); } @@ -275,6 +271,8 @@ export class LanguageDetectionWorkerClient extends EditorWorkerClient { return this.getWeightsUri(); case 'getRegexpModelUri': return this.getRegexpModelUri(); + case 'getLanguageId': + return this.getLanguageId(args[0]); case 'sendTelemetryEvent': return this.sendTelemetryEvent(args[0], args[1], args[2]); default: @@ -286,6 +284,20 @@ export class LanguageDetectionWorkerClient extends EditorWorkerClient { return this._indexJsUri; } + getLanguageId(languageIdOrExt: string | undefined) { + if (!languageIdOrExt) { + return undefined; + } + if (this._languageService.isRegisteredLanguageId(languageIdOrExt)) { + return languageIdOrExt; + } + const guessed = this._guessLanguageIdByUri(URI.file(`file.${languageIdOrExt}`)); + if (!guessed || guessed === 'unknown') { + return undefined; + } + return guessed; + } + async getModelJsonUri() { return this._modelJsonUri; } @@ -306,9 +318,15 @@ export class LanguageDetectionWorkerClient extends EditorWorkerClient { }); } - public async detectLanguage(resource: URI, langBiases: Record | undefined, preferHistory: boolean): Promise { + public async detectLanguage(resource: URI, langBiases: Record | undefined, preferHistory: boolean, supportedLangs?: string[]): Promise { + const quickGuess = this._guessLanguageIdByUri(resource); + if (quickGuess) { + return quickGuess; + } + await this._withSyncedResources([resource]); - return (await this._getProxy()).detectLanguage(resource.toString(), langBiases, preferHistory); + const modelId = await (await this._getProxy()).detectLanguage(resource.toString(), langBiases, preferHistory, supportedLangs); + return this.getLanguageId(modelId); } } diff --git a/src/vs/workbench/services/languageDetection/common/languageDetectionWorkerService.ts b/src/vs/workbench/services/languageDetection/common/languageDetectionWorkerService.ts index 2b725466178..0fe24c9c6fd 100644 --- a/src/vs/workbench/services/languageDetection/common/languageDetectionWorkerService.ts +++ b/src/vs/workbench/services/languageDetection/common/languageDetectionWorkerService.ts @@ -19,9 +19,10 @@ export interface ILanguageDetectionService { /** * @param resource The resource to detect the language for. + * @param supportedLangs Optional. When populated, the model will only return languages from the provided list * @returns the language id for the given resource or undefined if the model is not confident enough. */ - detectLanguage(resource: URI): Promise; + detectLanguage(resource: URI, supportedLangs?: string[]): Promise; } //#region Telemetry events diff --git a/src/vs/workbench/workbench.common.main.ts b/src/vs/workbench/workbench.common.main.ts index 295189cbf37..54fa8d93406 100644 --- a/src/vs/workbench/workbench.common.main.ts +++ b/src/vs/workbench/workbench.common.main.ts @@ -303,6 +303,9 @@ import 'vs/workbench/contrib/typeHierarchy/browser/typeHierarchy.contribution'; import 'vs/workbench/contrib/codeEditor/browser/outline/documentSymbolsOutline'; import 'vs/workbench/contrib/outline/browser/outline.contribution'; +// Language Detection +import 'vs/workbench/contrib/languageDetection/browser/languageDetection.contribution'; + // Language Status import 'vs/workbench/contrib/languageStatus/browser/languageStatus.contribution';