From da800b888974258f4ed7b034642d7ecb64d75f5b Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Thu, 12 Feb 2026 09:20:23 +0100 Subject: [PATCH] Simplify HomeAssistantWebSocket and raise on connection errors (#6553) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Raise HomeAssistantWSError when Core WebSocket is unreachable Previously, async_send_command silently returned None when Home Assistant Core was not reachable, leading to misleading error messages downstream (e.g. "returned invalid response of None instead of a list of users"). Refactor _can_send to _ensure_connected which now raises HomeAssistantWSError on connection failures while still returning False for silent-skip cases (shutdown, unsupported version). async_send_message catches the exception to preserve fire-and-forget behavior. Update callers that don't handle HomeAssistantWSError: _hardware_events and addon auto-update in tasks. Co-Authored-By: Claude Opus 4.6 * Simplify HomeAssistantWebSocket command/message distinction The WebSocket layer had a confusing split between "messages" (fire-and-forget) and "commands" (request/response) that didn't reflect Home Assistant Core's architecture where everything is just a WS command. - Remove dead WSClient.async_send_message (never called) - Rename async_send_message → _async_send_command (private, fire-and-forget) - Rename send_message → send_command (sync wrapper) - Simplify _ensure_connected: drop message param, always raise on failure - Simplify async_send_command: always raise on connection errors - Remove MIN_VERSION gating (minimum supported Core is now 2024.2+) - Remove begin_backup/end_backup version guards for Core < 2022.1.0 - Add debug logging for silently ignored connection errors Co-Authored-By: Claude Opus 4.6 * Wait for Core to come up before backup This is crucial since the WebSocket command to Core now fails with the new error handling if Core is not running yet. * Wait for Core install job instead * Use CLI to fetch jobs instead of Supervisor API The Supervisor API needs authentication token, which we have not available at this point in the workflow. Instead of fetching the token, we can use the CLI, which is available in the container. --------- Co-authored-by: Claude Opus 4.6 --- .github/workflows/builder.yml | 29 ++++++ supervisor/homeassistant/module.py | 20 +++-- supervisor/homeassistant/websocket.py | 99 +++++++++------------ supervisor/misc/tasks.py | 9 +- tests/api/test_backups.py | 4 +- tests/homeassistant/test_module.py | 6 +- tests/homeassistant/test_websocket.py | 68 ++++++++------ tests/host/test_connectivity.py | 2 +- tests/resolution/test_resolution_manager.py | 6 +- 9 files changed, 142 insertions(+), 101 deletions(-) diff --git a/.github/workflows/builder.yml b/.github/workflows/builder.yml index 46ee3e687..fb4425c3e 100644 --- a/.github/workflows/builder.yml +++ b/.github/workflows/builder.yml @@ -323,6 +323,35 @@ jobs: docker logs --tail 50 hassio_supervisor exit 1 + # Wait for Core to come up so subsequent steps (backup, addon install) succeed. + # On first startup, Supervisor installs Core via the "home_assistant_core_install" + # job (which pulls the image and then starts Core). Jobs with cleanup=True are + # removed from the jobs list once done, so we poll until it's gone. + - name: Wait for Core to be started + run: | + echo "Waiting for Home Assistant Core to be installed and started..." + timeout=300 + elapsed=0 + + while [ $elapsed -lt $timeout ]; do + jobs=$(docker exec hassio_cli ha jobs info --no-progress --raw-json | jq -r '.data.jobs[] | select(.name == "home_assistant_core_install" and .done == false) | .name' 2>/dev/null) + if [ -z "$jobs" ]; then + echo "Home Assistant Core install/start complete (took ${elapsed}s)" + exit 0 + fi + + if [ $((elapsed % 15)) -eq 0 ]; then + echo "Core still installing... (${elapsed}s/${timeout}s)" + fi + + sleep 5 + elapsed=$((elapsed + 5)) + done + + echo "ERROR: Home Assistant Core failed to install/start within ${timeout}s" + docker logs --tail 50 hassio_supervisor + exit 1 + - name: Check the Supervisor run: | echo "Checking supervisor info" diff --git a/supervisor/homeassistant/module.py b/supervisor/homeassistant/module.py index f2e35efd0..b8f51e205 100644 --- a/supervisor/homeassistant/module.py +++ b/supervisor/homeassistant/module.py @@ -360,15 +360,23 @@ class HomeAssistant(FileConfiguration, CoreSysAttributes): ): return - configuration: ( - dict[str, Any] | None - ) = await self.sys_homeassistant.websocket.async_send_command( - {ATTR_TYPE: "get_config"} - ) + try: + configuration: ( + dict[str, Any] | None + ) = await self.sys_homeassistant.websocket.async_send_command( + {ATTR_TYPE: "get_config"} + ) + except HomeAssistantWSError as err: + _LOGGER.warning( + "Can't get Home Assistant Core configuration: %s. Not sending hardware events to Home Assistant Core.", + err, + ) + return + if not configuration or "usb" not in configuration.get("components", []): return - self.sys_homeassistant.websocket.send_message({ATTR_TYPE: "usb/scan"}) + self.sys_homeassistant.websocket.send_command({ATTR_TYPE: "usb/scan"}) @Job(name="home_assistant_module_begin_backup") async def begin_backup(self) -> None: diff --git a/supervisor/homeassistant/websocket.py b/supervisor/homeassistant/websocket.py index 719efa733..21b200e7e 100644 --- a/supervisor/homeassistant/websocket.py +++ b/supervisor/homeassistant/websocket.py @@ -30,12 +30,6 @@ from ..exceptions import ( from ..utils.json import json_dumps from .const import CLOSING_STATES, WSEvent, WSType -MIN_VERSION = { - WSType.SUPERVISOR_EVENT: "2021.2.4", - WSType.BACKUP_START: "2022.1.0", - WSType.BACKUP_END: "2022.1.0", -} - _LOGGER: logging.Logger = logging.getLogger(__name__) T = TypeVar("T") @@ -71,15 +65,6 @@ class WSClient: if not self._client.closed: await self._client.close() - async def async_send_message(self, message: dict[str, Any]) -> None: - """Send a websocket message, don't wait for response.""" - self._message_id += 1 - _LOGGER.debug("Sending: %s", message) - try: - await self._client.send_json(message, dumps=json_dumps) - except ConnectionError as err: - raise HomeAssistantWSConnectionError(str(err)) from err - async def async_send_command(self, message: dict[str, Any]) -> T | None: """Send a websocket message, and return the response.""" self._message_id += 1 @@ -191,7 +176,7 @@ class HomeAssistantWebSocket(CoreSysAttributes): """Process queue once supervisor is running.""" if reference == CoreState.RUNNING: for msg in self._queue: - await self.async_send_message(msg) + await self._async_send_command(msg) self._queue.clear() @@ -212,38 +197,26 @@ class HomeAssistantWebSocket(CoreSysAttributes): self.sys_create_task(client.start_listener()) return client - async def _can_send(self, message: dict[str, Any]) -> bool: - """Determine if we can use WebSocket messages.""" + async def _ensure_connected(self) -> None: + """Ensure WebSocket connection is ready. + + Raises HomeAssistantWSError if unable to connect. + """ if self.sys_core.state in CLOSING_STATES: - return False + raise HomeAssistantWSError( + "WebSocket not available, system is shutting down" + ) connected = self._client and self._client.connected # If we are already connected, we can avoid the check_api_state call # since it makes a new socket connection and we already have one. if not connected and not await self.sys_homeassistant.api.check_api_state(): - # No core access, don't try. - return False - - if not self._client: - self._client = await self._get_ws_client() - - if not self._client.connected: - self._client = await self._get_ws_client() - - message_type = message.get("type") - - if ( - message_type is not None - and message_type in MIN_VERSION - and self._client.ha_version < MIN_VERSION[message_type] - ): - _LOGGER.info( - "WebSocket command %s is not supported until core-%s. Ignoring WebSocket message.", - message_type, - MIN_VERSION[message_type], + raise HomeAssistantWSError( + "Can't connect to Home Assistant Core WebSocket, the API is not reachable" ) - return False - return True + + if not self._client or not self._client.connected: + self._client = await self._get_ws_client() async def load(self) -> None: """Set up queue processor after startup completes.""" @@ -251,53 +224,61 @@ class HomeAssistantWebSocket(CoreSysAttributes): BusEvent.SUPERVISOR_STATE_CHANGE, self._process_queue ) - async def async_send_message(self, message: dict[str, Any]) -> None: - """Send a message with the WS client.""" - # Only commands allowed during startup as those tell Home Assistant to do something. - # Messages may cause clients to make follow-up API calls so those wait. + async def _async_send_command(self, message: dict[str, Any]) -> None: + """Send a fire-and-forget command via WebSocket. + + Queues messages during startup. Silently handles connection errors. + """ if self.sys_core.state in STARTING_STATES: self._queue.append(message) _LOGGER.debug("Queuing message until startup has completed: %s", message) return - if not await self._can_send(message): + try: + await self._ensure_connected() + except HomeAssistantWSError as err: + _LOGGER.debug("Can't send WebSocket command: %s", err) return + # _ensure_connected guarantees self._client is set + assert self._client + try: - if self._client: - await self._client.async_send_command(message) - except HomeAssistantWSConnectionError: + await self._client.async_send_command(message) + except HomeAssistantWSConnectionError as err: + _LOGGER.debug("Fire-and-forget WebSocket command failed: %s", err) if self._client: await self._client.close() self._client = None async def async_send_command(self, message: dict[str, Any]) -> T | None: - """Send a command with the WS client and wait for the response.""" - if not await self._can_send(message): - return None + """Send a command and return the response. + Raises HomeAssistantWSError if unable to connect to Home Assistant Core. + """ + await self._ensure_connected() + # _ensure_connected guarantees self._client is set + assert self._client try: - if self._client: - return await self._client.async_send_command(message) + return await self._client.async_send_command(message) except HomeAssistantWSConnectionError: if self._client: await self._client.close() self._client = None raise - return None - def send_message(self, message: dict[str, Any]) -> None: - """Send a supervisor/event message.""" + def send_command(self, message: dict[str, Any]) -> None: + """Send a fire-and-forget command via WebSocket.""" if self.sys_core.state in CLOSING_STATES: return - self.sys_create_task(self.async_send_message(message)) + self.sys_create_task(self._async_send_command(message)) async def async_supervisor_event_custom( self, event: WSEvent, extra_data: dict[str, Any] | None = None ) -> None: """Send a supervisor/event message to Home Assistant with custom data.""" try: - await self.async_send_message( + await self._async_send_command( { ATTR_TYPE: WSType.SUPERVISOR_EVENT, ATTR_DATA: { diff --git a/supervisor/misc/tasks.py b/supervisor/misc/tasks.py index ef529efcc..90440a25b 100644 --- a/supervisor/misc/tasks.py +++ b/supervisor/misc/tasks.py @@ -13,6 +13,7 @@ from ..exceptions import ( AddonsError, BackupFileNotFoundError, HomeAssistantError, + HomeAssistantWSError, ObserverError, SupervisorUpdateError, ) @@ -152,7 +153,13 @@ class Tasks(CoreSysAttributes): "Sending update add-on WebSocket command to Home Assistant Core: %s", message, ) - await self.sys_homeassistant.websocket.async_send_command(message) + try: + await self.sys_homeassistant.websocket.async_send_command(message) + except HomeAssistantWSError as err: + _LOGGER.warning( + "Could not send add-on update command to Home Assistant Core: %s", + err, + ) @Job( name="tasks_update_supervisor", diff --git a/tests/api/test_backups.py b/tests/api/test_backups.py index 9370679d9..59827c8aa 100644 --- a/tests/api/test_backups.py +++ b/tests/api/test_backups.py @@ -1186,7 +1186,9 @@ async def test_restore_homeassistant_adds_env( with ( patch.object(HomeAssistantCore, "_block_till_run"), patch.object( - HomeAssistantWebSocket, "async_send_message", new=mock_async_send_message + HomeAssistantWebSocket, + "_async_send_command", + new=mock_async_send_message, ), ): resp = await api_client.post( diff --git a/tests/homeassistant/test_module.py b/tests/homeassistant/test_module.py index c813c59b1..5459899e3 100644 --- a/tests/homeassistant/test_module.py +++ b/tests/homeassistant/test_module.py @@ -50,7 +50,7 @@ async def test_load( assert coresys.homeassistant.secrets.secrets == {"hello": "world"} await coresys.core.set_state(CoreState.SETUP) - await coresys.homeassistant.websocket.async_send_message({"lorem": "ipsum"}) + await coresys.homeassistant.websocket._async_send_command({"lorem": "ipsum"}) ha_ws_client.async_send_command.assert_not_called() await coresys.core.set_state(CoreState.RUNNING) @@ -93,7 +93,7 @@ async def test_begin_backup_ws_error(coresys: CoreSys): HomeAssistantWSConnectionError("Connection was closed") ) with ( - patch.object(HomeAssistantWebSocket, "_can_send", return_value=True), + patch.object(HomeAssistantWebSocket, "_ensure_connected", return_value=None), pytest.raises( HomeAssistantBackupError, match="Preparing backup of Home Assistant Core failed. Failed to inform HA Core: Connection was closed.", @@ -108,7 +108,7 @@ async def test_end_backup_ws_error(coresys: CoreSys, caplog: pytest.LogCaptureFi coresys.homeassistant.websocket._client.async_send_command.side_effect = ( HomeAssistantWSConnectionError("Connection was closed") ) - with patch.object(HomeAssistantWebSocket, "_can_send", return_value=True): + with patch.object(HomeAssistantWebSocket, "_ensure_connected", return_value=None): await coresys.homeassistant.end_backup() assert ( diff --git a/tests/homeassistant/test_websocket.py b/tests/homeassistant/test_websocket.py index 8573b9c9c..2fc2d6c04 100644 --- a/tests/homeassistant/test_websocket.py +++ b/tests/homeassistant/test_websocket.py @@ -2,18 +2,18 @@ # pylint: disable=import-error import asyncio -import logging -from unittest.mock import AsyncMock +from unittest.mock import AsyncMock, patch -from awesomeversion import AwesomeVersion +import pytest from supervisor.const import CoreState from supervisor.coresys import CoreSys +from supervisor.exceptions import HomeAssistantWSError from supervisor.homeassistant.const import WSEvent, WSType async def test_send_command(coresys: CoreSys, ha_ws_client: AsyncMock): - """Test websocket error on listen.""" + """Test sending a command returns a response.""" await coresys.homeassistant.websocket.async_send_command({"type": "test"}) ha_ws_client.async_send_command.assert_called_with({"type": "test"}) @@ -32,30 +32,10 @@ async def test_send_command(coresys: CoreSys, ha_ws_client: AsyncMock): ) -async def test_send_command_old_core_version( - coresys: CoreSys, ha_ws_client: AsyncMock, caplog +async def test_fire_and_forget_during_startup( + coresys: CoreSys, ha_ws_client: AsyncMock ): - """Test websocket error on listen.""" - caplog.set_level(logging.INFO) - ha_ws_client.ha_version = AwesomeVersion("1970.1.1") - - await coresys.homeassistant.websocket.async_send_command( - {"type": "supervisor/event"} - ) - - assert ( - "WebSocket command supervisor/event is not supported until core-2021.2.4" - in caplog.text - ) - - await coresys.homeassistant.websocket.async_supervisor_update_event( - "test", {"lorem": "ipsum"} - ) - ha_ws_client.async_send_command.assert_not_called() - - -async def test_send_message_during_startup(coresys: CoreSys, ha_ws_client: AsyncMock): - """Test websocket messages queue during startup.""" + """Test fire-and-forget commands queue during startup and replay when running.""" await coresys.homeassistant.websocket.load() await coresys.core.set_state(CoreState.SETUP) @@ -92,3 +72,37 @@ async def test_send_message_during_startup(coresys: CoreSys, ha_ws_client: Async "test", {"lorem": "ipsum"} ) ha_ws_client.async_send_command.assert_not_called() + + +async def test_send_command_core_not_reachable( + coresys: CoreSys, ha_ws_client: AsyncMock +): + """Test async_send_command raises when Core API is not reachable.""" + ha_ws_client.connected = False + with ( + patch.object(coresys.homeassistant.api, "check_api_state", return_value=False), + pytest.raises(HomeAssistantWSError, match="not reachable"), + ): + await coresys.homeassistant.websocket.async_send_command({"type": "test"}) + + ha_ws_client.async_send_command.assert_not_called() + + +async def test_fire_and_forget_core_not_reachable( + coresys: CoreSys, ha_ws_client: AsyncMock +): + """Test fire-and-forget command silently skips when Core API is not reachable.""" + ha_ws_client.connected = False + with patch.object(coresys.homeassistant.api, "check_api_state", return_value=False): + await coresys.homeassistant.websocket._async_send_command({"type": "test"}) + + ha_ws_client.async_send_command.assert_not_called() + + +async def test_send_command_during_shutdown(coresys: CoreSys, ha_ws_client: AsyncMock): + """Test async_send_command raises during shutdown.""" + await coresys.core.set_state(CoreState.SHUTDOWN) + with pytest.raises(HomeAssistantWSError, match="shutting down"): + await coresys.homeassistant.websocket.async_send_command({"type": "test"}) + + ha_ws_client.async_send_command.assert_not_called() diff --git a/tests/host/test_connectivity.py b/tests/host/test_connectivity.py index ad186f2f1..bd348b26e 100644 --- a/tests/host/test_connectivity.py +++ b/tests/host/test_connectivity.py @@ -49,7 +49,7 @@ async def test_connectivity_events(coresys: CoreSys, force: bool): await asyncio.sleep(0) with patch.object( - type(coresys.homeassistant.websocket), "async_send_message" + type(coresys.homeassistant.websocket), "_async_send_command" ) as send_message: await coresys.host.network.check_connectivity(force=force) await asyncio.sleep(0) diff --git a/tests/resolution/test_resolution_manager.py b/tests/resolution/test_resolution_manager.py index 946d9893c..4358b4339 100644 --- a/tests/resolution/test_resolution_manager.py +++ b/tests/resolution/test_resolution_manager.py @@ -312,7 +312,7 @@ async def test_resolution_apply_suggestion_multiple_copies(coresys: CoreSys): async def test_events_on_unsupported_changed(coresys: CoreSys): """Test events fired when unsupported changes.""" with patch.object( - type(coresys.homeassistant.websocket), "async_send_message" + type(coresys.homeassistant.websocket), "_async_send_command" ) as send_message: # Marking system as unsupported tells HA assert coresys.resolution.unsupported == [] @@ -376,7 +376,7 @@ async def test_events_on_unsupported_changed(coresys: CoreSys): async def test_events_on_unhealthy_changed(coresys: CoreSys): """Test events fired when unhealthy changes.""" with patch.object( - type(coresys.homeassistant.websocket), "async_send_message" + type(coresys.homeassistant.websocket), "_async_send_command" ) as send_message: # Marking system as unhealthy tells HA assert coresys.resolution.unhealthy == [] @@ -415,7 +415,7 @@ async def test_events_on_unhealthy_changed(coresys: CoreSys): async def test_dismiss_issue_removes_orphaned_suggestions(coresys: CoreSys): """Test dismissing an issue also removes any suggestions which have been orphaned.""" with patch.object( - type(coresys.homeassistant.websocket), "async_send_message" + type(coresys.homeassistant.websocket), "_async_send_command" ) as send_message: coresys.resolution.create_issue( IssueType.MOUNT_FAILED,