1
0
mirror of https://github.com/home-assistant/core.git synced 2026-05-08 17:49:37 +01:00

Replace microVAD with Silero VAD (ggml) (#158282)

This commit is contained in:
Michael Hansen
2025-12-08 20:02:14 -06:00
committed by GitHub
parent b120ae827f
commit 079c6daa63
7 changed files with 50 additions and 21 deletions
@@ -3,8 +3,9 @@
from abc import ABC, abstractmethod
from dataclasses import dataclass
import logging
import math
from pymicro_vad import MicroVad
from pysilero_vad import SileroVoiceActivityDetector
from pyspeex_noise import AudioProcessor
from .const import BYTES_PER_CHUNK
@@ -42,8 +43,8 @@ class AudioEnhancer(ABC):
"""Enhance chunk of PCM audio @ 16Khz with 16-bit mono samples."""
class MicroVadSpeexEnhancer(AudioEnhancer):
"""Audio enhancer that runs microVAD and speex."""
class SileroVadSpeexEnhancer(AudioEnhancer):
"""Audio enhancer that runs Silero VAD and speex."""
def __init__(
self, auto_gain: int, noise_suppression: int, is_vad_enabled: bool
@@ -69,21 +70,49 @@ class MicroVadSpeexEnhancer(AudioEnhancer):
self.noise_suppression,
)
self.vad: MicroVad | None = None
self.vad: SileroVoiceActivityDetector | None = None
# We get 10ms chunks but Silero works on 32ms chunks, so we have to
# buffer audio. The previous speech probability is used until enough
# audio has been buffered.
self._vad_buffer: bytearray | None = None
self._vad_buffer_chunks = 0
self._vad_buffer_chunk_idx = 0
self._last_speech_probability: float | None = None
if self.is_vad_enabled:
self.vad = MicroVad()
_LOGGER.debug("Initialized microVAD")
self.vad = SileroVoiceActivityDetector()
# VAD buffer is a multiple of 10ms, but Silero VAD needs 32ms.
self._vad_buffer_chunks = int(
math.ceil(self.vad.chunk_bytes() / BYTES_PER_CHUNK)
)
self._vad_leftover_bytes = self.vad.chunk_bytes() - BYTES_PER_CHUNK
self._vad_buffer = bytearray(self.vad.chunk_bytes())
_LOGGER.debug("Initialized Silero VAD")
def enhance_chunk(self, audio: bytes, timestamp_ms: int) -> EnhancedAudioChunk:
"""Enhance 10ms chunk of PCM audio @ 16Khz with 16-bit mono samples."""
speech_probability: float | None = None
assert len(audio) == BYTES_PER_CHUNK
if self.vad is not None:
# Run VAD
speech_probability = self.vad.Process10ms(audio)
assert self._vad_buffer is not None
start_idx = self._vad_buffer_chunk_idx * BYTES_PER_CHUNK
self._vad_buffer[start_idx : start_idx + BYTES_PER_CHUNK] = audio
self._vad_buffer_chunk_idx += 1
if self._vad_buffer_chunk_idx >= self._vad_buffer_chunks:
# We have enough data to run Silero VAD (32 ms)
self._last_speech_probability = self.vad.process_chunk(
self._vad_buffer[: self.vad.chunk_bytes()]
)
# Copy leftover audio that wasn't processed to start
self._vad_buffer[: self._vad_leftover_bytes] = self._vad_buffer[
-self._vad_leftover_bytes :
]
self._vad_buffer_chunk_idx = 0
if self.audio_processor is not None:
# Run noise suppression and auto gain
@@ -92,5 +121,5 @@ class MicroVadSpeexEnhancer(AudioEnhancer):
return EnhancedAudioChunk(
audio=audio,
timestamp_ms=timestamp_ms,
speech_probability=speech_probability,
speech_probability=self._last_speech_probability,
)
@@ -8,5 +8,5 @@
"integration_type": "system",
"iot_class": "local_push",
"quality_scale": "internal",
"requirements": ["pymicro-vad==1.0.1", "pyspeex-noise==1.0.2"]
"requirements": ["pysilero-vad==3.0.0", "pyspeex-noise==1.0.2"]
}
@@ -55,7 +55,7 @@ from homeassistant.util import (
from homeassistant.util.hass_dict import HassKey
from homeassistant.util.limited_size_dict import LimitedSizeDict
from .audio_enhancer import AudioEnhancer, EnhancedAudioChunk, MicroVadSpeexEnhancer
from .audio_enhancer import AudioEnhancer, EnhancedAudioChunk, SileroVadSpeexEnhancer
from .const import (
ACKNOWLEDGE_PATH,
BYTES_PER_CHUNK,
@@ -633,7 +633,7 @@ class PipelineRun:
# Initialize with audio settings
if self.audio_settings.needs_processor and (self.audio_enhancer is None):
# Default audio enhancer
self.audio_enhancer = MicroVadSpeexEnhancer(
self.audio_enhancer = SileroVadSpeexEnhancer(
self.audio_settings.auto_gain_dbfs,
self.audio_settings.noise_suppression_level,
self.audio_settings.is_vad_enabled,
+1 -1
View File
@@ -53,10 +53,10 @@ Pillow==12.0.0
propcache==0.4.1
psutil-home-assistant==0.0.1
PyJWT==2.10.1
pymicro-vad==1.0.1
PyNaCl==1.6.0
pyOpenSSL==25.3.0
pyserial==3.5
pysilero-vad==3.0.0
pyspeex-noise==1.0.2
python-slugify==8.0.4
PyTurboJPEG==1.8.0
+3 -3
View File
@@ -2195,9 +2195,6 @@ pymediaroom==0.6.5.4
# homeassistant.components.meteoclimatic
pymeteoclimatic==0.1.0
# homeassistant.components.assist_pipeline
pymicro-vad==1.0.1
# homeassistant.components.miele
pymiele==0.6.1
@@ -2399,6 +2396,9 @@ pysiaalarm==3.1.1
# homeassistant.components.signal_messenger
pysignalclirestapi==0.3.24
# homeassistant.components.assist_pipeline
pysilero-vad==3.0.0
# homeassistant.components.sky_hub
pyskyqhub==0.1.4
+3 -3
View File
@@ -1851,9 +1851,6 @@ pymata-express==1.19
# homeassistant.components.meteoclimatic
pymeteoclimatic==0.1.0
# homeassistant.components.assist_pipeline
pymicro-vad==1.0.1
# homeassistant.components.miele
pymiele==0.6.1
@@ -2019,6 +2016,9 @@ pysiaalarm==3.1.1
# homeassistant.components.signal_messenger
pysignalclirestapi==0.3.24
# homeassistant.components.assist_pipeline
pysilero-vad==3.0.0
# homeassistant.components.sma
pysma==1.0.2
+1 -1
View File
@@ -33,7 +33,7 @@ RUN --mount=from=ghcr.io/astral-sh/uv:0.9.6,source=/uv,target=/bin/uv \
hassil==3.5.0 \
home-assistant-intents==2025.12.2 \
mutagen==1.47.0 \
pymicro-vad==1.0.1 \
pysilero-vad==3.0.0 \
pyspeex-noise==1.0.2
LABEL "name"="hassfest"