diff --git a/build/gulpfile.vscode.js b/build/gulpfile.vscode.js index cd0a762dfa7..5f7f5ce4470 100644 --- a/build/gulpfile.vscode.js +++ b/build/gulpfile.vscode.js @@ -72,7 +72,7 @@ const vscodeResources = [ 'out-build/vs/workbench/contrib/terminal/browser/media/*.sh', 'out-build/vs/workbench/contrib/terminal/browser/media/*.zsh', 'out-build/vs/workbench/contrib/webview/browser/pre/*.js', - 'out-build/vs/workbench/services/voiceRecognition/electron-sandbox/bufferedVoiceTranscriber.js', + 'out-build/vs/workbench/services/voiceRecognition/electron-sandbox/voiceTranscriptionWorklet.js', 'out-build/vs/**/markdown.css', 'out-build/vs/workbench/contrib/tasks/**/*.json', '!**/test/**' diff --git a/src/vs/code/node/sharedProcess/contrib/voiceTranscriber.ts b/src/vs/code/node/sharedProcess/contrib/voiceTranscriber.ts index e195f2b8b7a..48a7843730a 100644 --- a/src/vs/code/node/sharedProcess/contrib/voiceTranscriber.ts +++ b/src/vs/code/node/sharedProcess/contrib/voiceTranscriber.ts @@ -7,12 +7,15 @@ import { Event } from 'vs/base/common/event'; import { MessagePortMain, MessageEvent } from 'vs/base/parts/sandbox/node/electronTypes'; import { Disposable, toDisposable } from 'vs/base/common/lifecycle'; import { IVoiceRecognitionService } from 'vs/platform/voiceRecognition/common/voiceRecognitionService'; +import { ILogService } from 'vs/platform/log/common/log'; +import { CancellationTokenSource } from 'vs/base/common/cancellation'; export class VoiceTranscriber extends Disposable { constructor( private readonly onDidWindowConnectRaw: Event, @IVoiceRecognitionService private readonly voiceRecognitionService: IVoiceRecognitionService, + @ILogService private readonly logService: ILogService ) { super(); @@ -21,12 +24,17 @@ export class VoiceTranscriber extends Disposable { private registerListeners(): void { this._register(this.onDidWindowConnectRaw(port => { + this.logService.info(`[voice] transcriber: new connection`); + + const cts = new CancellationTokenSource(); + this._register(toDisposable(() => cts.dispose(true))); + const portHandler = async (e: MessageEvent) => { if (!(e.data instanceof Float32Array)) { return; } - const result = await this.voiceRecognitionService.transcribe(e.data); + const result = await this.voiceRecognitionService.transcribe(e.data, cts.token); port.postMessage(result); }; @@ -36,6 +44,12 @@ export class VoiceTranscriber extends Disposable { port.start(); this._register(toDisposable(() => port.close())); + + port.on('close', () => { + this.logService.info(`[voice] transcriber: closed connection`); + + cts.dispose(true); + }); })); } } diff --git a/src/vs/platform/voiceRecognition/common/voiceRecognitionService.ts b/src/vs/platform/voiceRecognition/common/voiceRecognitionService.ts index a563a2e45cd..ca52963092f 100644 --- a/src/vs/platform/voiceRecognition/common/voiceRecognitionService.ts +++ b/src/vs/platform/voiceRecognition/common/voiceRecognitionService.ts @@ -3,6 +3,7 @@ * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ +import { CancellationToken } from 'vs/base/common/cancellation'; import { createDecorator } from 'vs/platform/instantiation/common/instantiation'; export const IVoiceRecognitionService = createDecorator('voiceRecognitionService'); @@ -21,5 +22,5 @@ export interface IVoiceRecognitionService { * - 16khz sampling rate * - 16bit sample size */ - transcribe(channelData: Float32Array): Promise; + transcribe(channelData: Float32Array, cancellation: CancellationToken): Promise; } diff --git a/src/vs/platform/voiceRecognition/node/voiceRecognitionService.ts b/src/vs/platform/voiceRecognition/node/voiceRecognitionService.ts index a67fb2c54b4..f1e259b76ad 100644 --- a/src/vs/platform/voiceRecognition/node/voiceRecognitionService.ts +++ b/src/vs/platform/voiceRecognition/node/voiceRecognitionService.ts @@ -3,6 +3,7 @@ * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ +import { CancellationToken } from 'vs/base/common/cancellation'; import { ILogService } from 'vs/platform/log/common/log'; import { IVoiceRecognitionService } from 'vs/platform/voiceRecognition/common/voiceRecognitionService'; @@ -14,7 +15,7 @@ export class VoiceRecognitionService implements IVoiceRecognitionService { @ILogService private readonly logService: ILogService ) { } - async transcribe(channelData: Float32Array): Promise { + async transcribe(channelData: Float32Array, cancellation: CancellationToken): Promise { this.logService.info(`[voice] transcribe(${channelData.length}): Begin`); const modulePath = process.env.VSCODE_VOICE_MODULE_PATH; @@ -23,7 +24,6 @@ export class VoiceRecognitionService implements IVoiceRecognitionService { } const now = Date.now(); - const conversionTime = Date.now() - now; const voiceModule: { transcribe: (audioBuffer: { channelCount: 1; sampleRate: 16000; sampleSize: 16; channelData: Float32Array }, options: { language: string | 'auto'; suppressNonSpeechTokens: boolean }) => Promise } = require.__$__nodeRequire(modulePath); @@ -37,7 +37,7 @@ export class VoiceRecognitionService implements IVoiceRecognitionService { suppressNonSpeechTokens: true }); - this.logService.info(`[voice] transcribe(${channelData.length}): End (text: "${text}", took: ${Date.now() - now}ms total, ${conversionTime}ms uint8->float32 conversion)`); + this.logService.info(`[voice] transcribe(${channelData.length}): End (text: "${text}", took: ${Date.now() - now}ms)`); return text; } diff --git a/src/vs/workbench/services/voiceRecognition/electron-sandbox/bufferedVoiceTranscriber.ts b/src/vs/workbench/services/voiceRecognition/electron-sandbox/voiceTranscriptionWorklet.ts similarity index 68% rename from src/vs/workbench/services/voiceRecognition/electron-sandbox/bufferedVoiceTranscriber.ts rename to src/vs/workbench/services/voiceRecognition/electron-sandbox/voiceTranscriptionWorklet.ts index bb6c8eaaa17..d6edf688d1f 100644 --- a/src/vs/workbench/services/voiceRecognition/electron-sandbox/bufferedVoiceTranscriber.ts +++ b/src/vs/workbench/services/voiceRecognition/electron-sandbox/voiceTranscriptionWorklet.ts @@ -10,7 +10,7 @@ declare class AudioWorkletProcessor { process(inputs: [Float32Array[]], outputs: [Float32Array[]]): boolean; } -class BufferedVoiceTranscriber extends AudioWorkletProcessor { +class VoiceTranscriptionWorklet extends AudioWorkletProcessor { private static readonly BUFFER_TIMESPAN = 2000; @@ -21,6 +21,8 @@ class BufferedVoiceTranscriber extends AudioWorkletProcessor { private sharedProcessConnection: MessagePort | undefined = undefined; + private stopped: boolean = false; + constructor() { super(); @@ -29,16 +31,32 @@ class BufferedVoiceTranscriber extends AudioWorkletProcessor { private registerListeners() { this.port.onmessage = event => { - if (event.data === 'vscode:transferSharedProcessConnection') { - this.sharedProcessConnection = event.ports[0]; + switch (event.data) { + case 'vscode:startVoiceTranscription': { + this.sharedProcessConnection = event.ports[0]; - this.sharedProcessConnection.onmessage = event => { - if (typeof event.data === 'string') { - this.port.postMessage(event.data); - } - }; + this.sharedProcessConnection.onmessage = event => { + if (this.stopped) { + return; + } - this.sharedProcessConnection.start(); + if (typeof event.data === 'string') { + this.port.postMessage(event.data); + } + }; + + this.sharedProcessConnection.start(); + break; + } + + case 'vscode:stopVoiceTranscription': { + this.stopped = true; + + this.sharedProcessConnection?.close(); + this.sharedProcessConnection = undefined; + + break; + } } }; } @@ -50,12 +68,12 @@ class BufferedVoiceTranscriber extends AudioWorkletProcessor { const inputChannelData = inputs[0][0]; if ((!(inputChannelData instanceof Float32Array))) { - return true; + return !this.stopped; } this.currentInputFloat32Arrays.push(inputChannelData.slice(0)); - if (Date.now() - this.startTime > BufferedVoiceTranscriber.BUFFER_TIMESPAN && this.sharedProcessConnection) { + if (Date.now() - this.startTime > VoiceTranscriptionWorklet.BUFFER_TIMESPAN && this.sharedProcessConnection) { const currentInputFloat32Arrays = this.currentInputFloat32Arrays; this.currentInputFloat32Arrays = []; @@ -66,7 +84,7 @@ class BufferedVoiceTranscriber extends AudioWorkletProcessor { this.startTime = Date.now(); } - return true; + return !this.stopped; } private joinFloat32Arrays(float32Arrays: Float32Array[]): Float32Array { @@ -83,4 +101,4 @@ class BufferedVoiceTranscriber extends AudioWorkletProcessor { } // @ts-ignore -registerProcessor('buffered-voice-transcriber', BufferedVoiceTranscriber); +registerProcessor('voice-transcription-worklet', VoiceTranscriptionWorklet); diff --git a/src/vs/workbench/services/voiceRecognition/electron-sandbox/workbenchVoiceRecognitionService.ts b/src/vs/workbench/services/voiceRecognition/electron-sandbox/workbenchVoiceRecognitionService.ts index 329b340651b..a676ed1aff5 100644 --- a/src/vs/workbench/services/voiceRecognition/electron-sandbox/workbenchVoiceRecognitionService.ts +++ b/src/vs/workbench/services/voiceRecognition/electron-sandbox/workbenchVoiceRecognitionService.ts @@ -28,7 +28,7 @@ export interface IWorkbenchVoiceRecognitionService { transcribe(cancellation: CancellationToken): Event; } -class BufferedVoiceTranscriber extends AudioWorkletNode { +class VoiceTranscriptionWorkletNode extends AudioWorkletNode { constructor( context: BaseAudioContext, @@ -36,7 +36,7 @@ class BufferedVoiceTranscriber extends AudioWorkletNode { private readonly onDidTranscribe: Emitter, private readonly sharedProcessService: ISharedProcessService ) { - super(context, 'buffered-voice-transcriber', options); + super(context, 'voice-transcription-worklet', options); this.registerListeners(); } @@ -51,16 +51,19 @@ class BufferedVoiceTranscriber extends AudioWorkletNode { async start(token: CancellationToken): Promise { const sharedProcessConnection = await this.sharedProcessService.createRawConnection(); - token.onCancellationRequested(() => sharedProcessConnection.close()); - this.port.postMessage('vscode:transferSharedProcessConnection', [sharedProcessConnection]); + token.onCancellationRequested(() => { + this.port.postMessage('vscode:stopVoiceTranscription'); + this.disconnect(); + }); + + this.port.postMessage('vscode:startVoiceTranscription', [sharedProcessConnection]); } } // TODO@voice // - how to prevent data processing accumulation when processing is slow? // - how to make this a singleton service that enables ref-counting on multiple callers? -// - cancellation should flow to the shared process // - voice module should directly transcribe the PCM32 data without wav+file conversion export class WorkbenchVoiceRecognitionService implements IWorkbenchVoiceRecognitionService { @@ -85,8 +88,8 @@ export class WorkbenchVoiceRecognitionService implements IWorkbenchVoiceRecognit return onDidTranscribe.event; } - private async doTranscribe(onDidTranscribe: Emitter, token: CancellationToken): Promise { - return this.progressService.withProgress({ + private doTranscribe(onDidTranscribe: Emitter, token: CancellationToken): void { + this.progressService.withProgress({ location: ProgressLocation.Window, title: localize('voiceTranscription', "Voice Transcription"), }, async progress => { @@ -116,21 +119,32 @@ export class WorkbenchVoiceRecognitionService implements IWorkbenchVoiceRecognit const microphoneSource = audioContext.createMediaStreamSource(microphoneDevice); token.onCancellationRequested(() => { - microphoneDevice.getTracks().forEach(track => track.stop()); + for (const track of microphoneDevice.getTracks()) { + track.stop(); + } + microphoneSource.disconnect(); audioContext.close(); recordingDone.complete(); }); - await audioContext.audioWorklet.addModule(FileAccess.asBrowserUri('vs/workbench/services/voiceRecognition/electron-sandbox/bufferedVoiceTranscriber.js').toString(true)); + await audioContext.audioWorklet.addModule(FileAccess.asBrowserUri('vs/workbench/services/voiceRecognition/electron-sandbox/voiceTranscriptionWorklet.js').toString(true)); - const bufferedVoiceTranscriberTarget = new BufferedVoiceTranscriber(audioContext, { + if (token.isCancellationRequested) { + return; + } + + const voiceTranscriptionTarget = new VoiceTranscriptionWorkletNode(audioContext, { channelCount: WorkbenchVoiceRecognitionService.AUDIO_CHANNELS, channelCountMode: 'explicit' }, onDidTranscribe, this.sharedProcessService); - await bufferedVoiceTranscriberTarget.start(token); + await voiceTranscriptionTarget.start(token); - microphoneSource.connect(bufferedVoiceTranscriberTarget); + if (token.isCancellationRequested) { + return; + } + + microphoneSource.connect(voiceTranscriptionTarget); progress.report({ message: localize('voiceTranscriptionRecording', "Recording from microphone...") });