diff --git a/build/lib/i18n.resources.json b/build/lib/i18n.resources.json index f83225cc974..15a06c1a021 100644 --- a/build/lib/i18n.resources.json +++ b/build/lib/i18n.resources.json @@ -78,6 +78,10 @@ "name": "vs/workbench/services/assignment", "project": "vscode-workbench" }, + { + "name": "vs/workbench/services/voiceRecognition", + "project": "vscode-workbench" + }, { "name": "vs/workbench/contrib/extensions", "project": "vscode-workbench" diff --git a/src/vs/code/electron-main/app.ts b/src/vs/code/electron-main/app.ts index bd6fb95a75d..9c79498b8f5 100644 --- a/src/vs/code/electron-main/app.ts +++ b/src/vs/code/electron-main/app.ts @@ -162,24 +162,36 @@ export class CodeApplication extends Disposable { const isUrlFromWebview = (requestingUrl: string | undefined) => requestingUrl?.startsWith(`${Schemas.vscodeWebview}://`); + const allowedPermissionsInMainFrame = new Set([ + 'media' + ]); + const allowedPermissionsInWebview = new Set([ 'clipboard-read', 'clipboard-sanitized-write', ]); - session.defaultSession.setPermissionRequestHandler((_webContents, permission /* 'media' | 'geolocation' | 'notifications' | 'midiSysex' | 'pointerLock' | 'fullscreen' | 'openExternal' */, callback, details) => { + session.defaultSession.setPermissionRequestHandler((_webContents, permission, callback, details) => { if (isUrlFromWebview(details.requestingUrl)) { return callback(allowedPermissionsInWebview.has(permission)); } + if (details.isMainFrame && details.securityOrigin === 'vscode-file://vscode-app/') { + return callback(allowedPermissionsInMainFrame.has(permission)); + } + return callback(false); }); - session.defaultSession.setPermissionCheckHandler((_webContents, permission /* 'media' */, _origin, details) => { + session.defaultSession.setPermissionCheckHandler((_webContents, permission, _origin, details) => { if (isUrlFromWebview(details.requestingUrl)) { return allowedPermissionsInWebview.has(permission); } + if (details.isMainFrame && details.securityOrigin === 'vscode-file://vscode-app/') { + return allowedPermissionsInMainFrame.has(permission); + } + return false; }); diff --git a/src/vs/code/node/sharedProcess/sharedProcessMain.ts b/src/vs/code/node/sharedProcess/sharedProcessMain.ts index f911b0da431..7092cc2b8ec 100644 --- a/src/vs/code/node/sharedProcess/sharedProcessMain.ts +++ b/src/vs/code/node/sharedProcess/sharedProcessMain.ts @@ -112,6 +112,8 @@ import { IRemoteSocketFactoryService, RemoteSocketFactoryService } from 'vs/plat import { RemoteConnectionType } from 'vs/platform/remote/common/remoteAuthorityResolver'; import { nodeSocketFactory } from 'vs/platform/remote/node/nodeSocketFactory'; import { NativeEnvironmentService } from 'vs/platform/environment/node/environmentService'; +import { IVoiceRecognitionService } from 'vs/platform/voiceRecognition/common/voiceRecognitionService'; +import { VoiceRecognitionService } from 'vs/platform/voiceRecognition/node/voiceRecognitionService'; class SharedProcessMain extends Disposable { @@ -351,6 +353,9 @@ class SharedProcessMain extends Disposable { // Remote Tunnel services.set(IRemoteTunnelService, new SyncDescriptor(RemoteTunnelService)); + // Voice Recognition + services.set(IVoiceRecognitionService, new SyncDescriptor(VoiceRecognitionService, undefined, false /* proxied to other processes */)); + return new InstantiationService(services); } @@ -408,6 +413,10 @@ class SharedProcessMain extends Disposable { // Remote Tunnel const remoteTunnelChannel = ProxyChannel.fromService(accessor.get(IRemoteTunnelService)); this.server.registerChannel('remoteTunnel', remoteTunnelChannel); + + // Voice Recognition + const voiceRecognitionChannel = ProxyChannel.fromService(accessor.get(IVoiceRecognitionService)); + this.server.registerChannel('voiceRecognition', voiceRecognitionChannel); } private registerErrorHandler(logService: ILogService): void { diff --git a/src/vs/platform/voiceRecognition/common/voiceRecognitionService.ts b/src/vs/platform/voiceRecognition/common/voiceRecognitionService.ts new file mode 100644 index 00000000000..0cc3310df97 --- /dev/null +++ b/src/vs/platform/voiceRecognition/common/voiceRecognitionService.ts @@ -0,0 +1,31 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { VSFloat32Array } from 'vs/base/common/buffer'; +import { createDecorator } from 'vs/platform/instantiation/common/instantiation'; + +export const IVoiceRecognitionService = createDecorator('voiceRecognitionService'); + +export interface IAudioBuffer { + readonly sampleRate: 16000; + readonly channelCount: 1; + readonly length: number; + readonly channelData: VSFloat32Array; +} + +export interface IVoiceRecognitionService { + + readonly _serviceBrand: undefined; + + /** + * Given a buffer of audio data, attempts to + * transcribe the spoken words into text. + * + * @param buffer the audio data obtained from + * the microphone as PCM 32-bit float mono in + * 16khz. + */ + transcribe(buffer: IAudioBuffer): Promise; +} diff --git a/src/vs/platform/voiceRecognition/node/voiceRecognitionService.ts b/src/vs/platform/voiceRecognition/node/voiceRecognitionService.ts new file mode 100644 index 00000000000..cb57192e299 --- /dev/null +++ b/src/vs/platform/voiceRecognition/node/voiceRecognitionService.ts @@ -0,0 +1,38 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { ILogService } from 'vs/platform/log/common/log'; +import { IAudioBuffer, IVoiceRecognitionService } from 'vs/platform/voiceRecognition/common/voiceRecognitionService'; + +export class VoiceRecognitionService implements IVoiceRecognitionService { + + declare readonly _serviceBrand: undefined; + + constructor( + @ILogService private readonly logService: ILogService + ) { } + + async transcribe(buffer: IAudioBuffer): Promise { + this.logService.info(`[voice] transcribe(${buffer.length}): Begin`); + + const modulePath = process.env.VSCODE_VOICE_MODULE_PATH; + if (!modulePath) { + throw new Error('Voice recognition not yet supported!'); + } + + const voiceModule: { transcribe: (audioBuffer: { channelCount: 1; length: number; sampleRate: 16000; channelData: Float32Array }) => Promise } = require.__$__nodeRequire(modulePath); + + const text = await voiceModule.transcribe({ + channelCount: buffer.channelCount, + length: buffer.length, + sampleRate: buffer.sampleRate, + channelData: buffer.channelData.buffer + }); + + this.logService.info(`[voice] transcribe(${buffer.length}): End (text: "${text}"))`); + + return text; + } +} diff --git a/src/vs/workbench/contrib/chat/electron-sandbox/chat.contribution.ts b/src/vs/workbench/contrib/chat/electron-sandbox/chat.contribution.ts new file mode 100644 index 00000000000..0f0c9474e27 --- /dev/null +++ b/src/vs/workbench/contrib/chat/electron-sandbox/chat.contribution.ts @@ -0,0 +1,50 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { toAction } from 'vs/base/common/actions'; +import { CancellationTokenSource } from 'vs/base/common/cancellation'; +import { DisposableStore, toDisposable } from 'vs/base/common/lifecycle'; +import { CommandsRegistry } from 'vs/platform/commands/common/commands'; +import { INotificationService, NotificationPriority, Severity } from 'vs/platform/notification/common/notification'; +import { IWorkbenchVoiceRecognitionService } from 'vs/workbench/services/voiceRecognition/electron-sandbox/workbenchVoiceRecognitionService'; + +let activeVoiceTranscription: DisposableStore | undefined; + +function stopVoiceTranscription() { + activeVoiceTranscription?.dispose(); + activeVoiceTranscription = undefined; +} + +CommandsRegistry.registerCommand('workbench.action.toggleVoiceTranscription', async services => { + if (activeVoiceTranscription) { + stopVoiceTranscription(); + } else { + const voiceRecognitionService = services.get(IWorkbenchVoiceRecognitionService); + const notificationService = services.get(INotificationService); + + activeVoiceTranscription = new DisposableStore(); + + const cts = new CancellationTokenSource(); + activeVoiceTranscription.add(toDisposable(() => cts.dispose(true))); + + const voiceTranscriptionNotification = notificationService.notify({ + severity: Severity.Info, + priority: NotificationPriority.URGENT, + sticky: true, + message: 'Listening...', + actions: { + primary: [ + toAction({ id: 'stopVoiceTranscription', label: 'Stop', run: () => stopVoiceTranscription() }) + ] + } + }); + + activeVoiceTranscription.add(toDisposable(() => voiceTranscriptionNotification.close())); + + activeVoiceTranscription.add(voiceRecognitionService.transcribe(cts.token)(text => { + voiceTranscriptionNotification.updateMessage(text); + })); + } +}); diff --git a/src/vs/workbench/services/voiceRecognition/electron-sandbox/voiceRecognitionService.ts b/src/vs/workbench/services/voiceRecognition/electron-sandbox/voiceRecognitionService.ts new file mode 100644 index 00000000000..e8b4e771ffa --- /dev/null +++ b/src/vs/workbench/services/voiceRecognition/electron-sandbox/voiceRecognitionService.ts @@ -0,0 +1,9 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { registerSharedProcessRemoteService } from 'vs/platform/ipc/electron-sandbox/services'; +import { IVoiceRecognitionService } from 'vs/platform/voiceRecognition/common/voiceRecognitionService'; + +registerSharedProcessRemoteService(IVoiceRecognitionService, 'voiceRecognition'); diff --git a/src/vs/workbench/services/voiceRecognition/electron-sandbox/workbenchVoiceRecognitionService.ts b/src/vs/workbench/services/voiceRecognition/electron-sandbox/workbenchVoiceRecognitionService.ts new file mode 100644 index 00000000000..a98131381ce --- /dev/null +++ b/src/vs/workbench/services/voiceRecognition/electron-sandbox/workbenchVoiceRecognitionService.ts @@ -0,0 +1,140 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { localize } from 'vs/nls'; +import { VSFloat32Array } from 'vs/base/common/buffer'; +import { CancellationToken, CancellationTokenSource } from 'vs/base/common/cancellation'; +import { InstantiationType, registerSingleton } from 'vs/platform/instantiation/common/extensions'; +import { createDecorator } from 'vs/platform/instantiation/common/instantiation'; +import { IVoiceRecognitionService } from 'vs/platform/voiceRecognition/common/voiceRecognitionService'; +import { Emitter, Event } from 'vs/base/common/event'; +import { IProgressService, ProgressLocation } from 'vs/platform/progress/common/progress'; +import { DeferredPromise } from 'vs/base/common/async'; + +export const IWorkbenchVoiceRecognitionService = createDecorator('workbenchVoiceRecognitionService'); + +export interface IWorkbenchVoiceRecognitionService { + + readonly _serviceBrand: undefined; + + /** + * Starts listening to the microphone transcribing the voice to text. + * + * @param cancellation a cancellation token to stop transcribing and + * listening to the microphone. + */ + transcribe(cancellation: CancellationToken): Event; +} + +// TODO@voice +// - load `navigator.mediaDevices.getUserMedia` lazily on startup? or would it trigger a permission prompt? +// - figure out the ugly `any` cast for AudioContext +// - how to prevent data processing accumulation when processing is slow? +// - how to make this a singleton service that enables ref-counting on multiple callers? +// - cancellation should flow to the shared process +// - voice module should directly transcribe the PCM32 data +// - we should transfer the Float32Array directly without serialisation overhead + +export class WorkbenchVoiceRecognitionService implements IWorkbenchVoiceRecognitionService { + + declare readonly _serviceBrand: undefined; + + private static readonly AUDIO_TIME_SLICE = 2000; + private static readonly AUDIO_MIME_TYPE = 'audio/webm;codecs=opus'; + + constructor( + @IVoiceRecognitionService private readonly voiceRecognitionService: IVoiceRecognitionService, + @IProgressService private readonly progressService: IProgressService + ) { } + + transcribe(cancellation: CancellationToken): Event { + const cts = new CancellationTokenSource(cancellation); + const emitter = new Emitter(); + cancellation.onCancellationRequested(() => emitter.dispose()); + + this.doTranscribe(emitter, cts.token); + + return emitter.event; + } + + private async doTranscribe(emitter: Emitter, token: CancellationToken): Promise { + return this.progressService.withProgress({ + location: ProgressLocation.Window, + title: localize('voiceTranscription', "Voice Transcription"), + }, async progress => { + const recordingDone = new DeferredPromise(); + + progress.report({ message: localize('voiceTranscriptionGettingReady', "Getting microphone ready...") }); + + const audioDevice = await navigator.mediaDevices.getUserMedia({ audio: true }); + + if (token.isCancellationRequested) { + return; + } + + const audioRecorder = new MediaRecorder(audioDevice, { mimeType: WorkbenchVoiceRecognitionService.AUDIO_MIME_TYPE }); + audioRecorder.start(WorkbenchVoiceRecognitionService.AUDIO_TIME_SLICE); + + token.onCancellationRequested(() => { + audioRecorder.stop(); + recordingDone.complete(); + }); + + progress.report({ message: localize('voiceTranscriptionRecording', "Recording from microphone...") }); + + const chunks: Blob[] = []; + audioRecorder.ondataavailable = e => { + chunks.push(e.data); + + this.doTranscribeChunk(chunks, emitter, token); + }; + + return recordingDone.p; + }); + } + + private async doTranscribeChunk(chunks: Blob[], emitter: Emitter, token: CancellationToken): Promise { + if (token.isCancellationRequested) { + return; + } + + const blob = new Blob(chunks); + const blobBuffer = await blob.arrayBuffer(); + if (token.isCancellationRequested) { + return; + } + + const audioContextOptions = { + sampleRate: 16000 as const, + channelCount: 1 as const, + echoCancellation: false, + autoGainControl: true, + noiseSuppression: true + }; + + const context = new AudioContext(audioContextOptions as any); + + const audioBuffer = await context.decodeAudioData(blobBuffer); + if (token.isCancellationRequested) { + return; + } + + const text = await this.voiceRecognitionService.transcribe({ + sampleRate: audioContextOptions.sampleRate, + channelCount: audioContextOptions.channelCount, + length: audioBuffer.length, + channelData: VSFloat32Array.wrap(audioBuffer.getChannelData(0)) + }); + + if (token.isCancellationRequested) { + return; + } + + emitter.fire(text); + } +} + +// Register Service +registerSingleton(IWorkbenchVoiceRecognitionService, WorkbenchVoiceRecognitionService, InstantiationType.Delayed); diff --git a/src/vs/workbench/workbench.desktop.main.ts b/src/vs/workbench/workbench.desktop.main.ts index e943de4afff..b3367ac6ea2 100644 --- a/src/vs/workbench/workbench.desktop.main.ts +++ b/src/vs/workbench/workbench.desktop.main.ts @@ -77,6 +77,8 @@ import 'vs/workbench/services/environment/electron-sandbox/shellEnvironmentServi import 'vs/workbench/services/integrity/electron-sandbox/integrityService'; import 'vs/workbench/services/workingCopy/electron-sandbox/workingCopyBackupService'; import 'vs/workbench/services/checksum/electron-sandbox/checksumService'; +import 'vs/workbench/services/voiceRecognition/electron-sandbox/voiceRecognitionService'; +import 'vs/workbench/services/voiceRecognition/electron-sandbox/workbenchVoiceRecognitionService'; import 'vs/platform/remote/electron-sandbox/sharedProcessTunnelService'; import 'vs/workbench/services/tunnel/electron-sandbox/tunnelService'; import 'vs/platform/diagnostics/electron-sandbox/diagnosticsService'; @@ -169,6 +171,9 @@ import 'vs/workbench/contrib/mergeEditor/electron-sandbox/mergeEditor.contributi // Remote Tunnel import 'vs/workbench/contrib/remoteTunnel/electron-sandbox/remoteTunnel.contribution'; +// Chat +import 'vs/workbench/contrib/chat/electron-sandbox/chat.contribution'; + //#endregion