1
0
mirror of https://github.com/home-assistant/core.git synced 2026-04-17 15:44:52 +01:00

Suppress roborock failures under some unavailability threshold (#158673)

Co-authored-by: Joost Lekkerkerker <joostlek@outlook.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Allen Porter
2025-12-13 13:30:21 -08:00
committed by GitHub
parent afb9e18a7d
commit 90c8c56a06
2 changed files with 123 additions and 22 deletions

View File

@@ -43,6 +43,12 @@ from .models import DeviceState
SCAN_INTERVAL = timedelta(seconds=30)
# Roborock devices have a known issue where they go offline for a short period
# around 3AM local time for ~1 minute and reset both the local connection
# and MQTT connection. To avoid log spam, we will avoid reporting failures refreshing
# data until this duration has passed.
MIN_UNAVAILABLE_DURATION = timedelta(minutes=2)
_LOGGER = logging.getLogger(__name__)
@@ -102,6 +108,9 @@ class RoborockDataUpdateCoordinator(DataUpdateCoordinator[DeviceState]):
# Keep track of last attempt to refresh maps/rooms to know when to try again.
self._last_home_update_attempt: datetime
self.last_home_update: datetime | None = None
# Tracks the last successful update to control when we report failure
# to the base class. This is reset on successful data update.
self._last_update_success_time: datetime | None = None
@cached_property
def dock_device_info(self) -> DeviceInfo:
@@ -169,7 +178,7 @@ class RoborockDataUpdateCoordinator(DataUpdateCoordinator[DeviceState]):
self.last_home_update = dt_util.utcnow()
async def _verify_api(self) -> None:
"""Verify that the api is reachable. If it is not, switch clients."""
"""Verify that the api is reachable."""
if self._device.is_connected:
if self._device.is_local_connected:
async_delete_issue(
@@ -217,26 +226,27 @@ class RoborockDataUpdateCoordinator(DataUpdateCoordinator[DeviceState]):
try:
# Update device props and standard api information
await self._update_device_prop()
except UpdateFailed:
if self._should_suppress_update_failure():
_LOGGER.debug(
"Suppressing update failure until unavailable duration passed"
)
return self.data
raise
# If the vacuum is currently cleaning and it has been IMAGE_CACHE_INTERVAL
# since the last map update, you can update the map.
new_status = self.properties_api.status
if (
new_status.in_cleaning
and (dt_util.utcnow() - self._last_home_update_attempt)
> IMAGE_CACHE_INTERVAL
) or self.last_update_state != new_status.state_name:
self._last_home_update_attempt = dt_util.utcnow()
try:
await self.update_map()
except HomeAssistantError as err:
_LOGGER.debug("Failed to update map: %s", err)
except RoborockException as ex:
_LOGGER.debug("Failed to update data: %s", ex)
raise UpdateFailed(
translation_domain=DOMAIN,
translation_key="update_data_fail",
) from ex
# If the vacuum is currently cleaning and it has been IMAGE_CACHE_INTERVAL
# since the last map update, you can update the map.
new_status = self.properties_api.status
if (
new_status.in_cleaning
and (dt_util.utcnow() - self._last_home_update_attempt)
> IMAGE_CACHE_INTERVAL
) or self.last_update_state != new_status.state_name:
self._last_home_update_attempt = dt_util.utcnow()
try:
await self.update_map()
except HomeAssistantError as err:
_LOGGER.debug("Failed to update map: %s", err)
if self.properties_api.status.in_cleaning:
if self._device.is_local_connected:
@@ -248,6 +258,8 @@ class RoborockDataUpdateCoordinator(DataUpdateCoordinator[DeviceState]):
else:
self.update_interval = V1_CLOUD_NOT_CLEANING_INTERVAL
self.last_update_state = self.properties_api.status.state_name
self._last_update_success_time = dt_util.utcnow()
_LOGGER.debug("Data update successful %s", self._last_update_success_time)
return DeviceState(
status=self.properties_api.status,
dnd_timer=self.properties_api.dnd,
@@ -255,6 +267,23 @@ class RoborockDataUpdateCoordinator(DataUpdateCoordinator[DeviceState]):
clean_summary=self.properties_api.clean_summary,
)
def _should_suppress_update_failure(self) -> bool:
"""Determine if we should suppress update failure reporting.
We suppress reporting update failures until a minimum duration has
passed since the last successful update. This is used to avoid reporting
the device as unavailable for short periods, a known issue.
The intent is to apply to routine background state refreshes and not
other failures such as the first update or map updates.
"""
if self._last_update_success_time is None:
# Never had a successful update, do not suppress
return False
failure_duration = dt_util.utcnow() - self._last_update_success_time
_LOGGER.debug("Update failure duration: %s", failure_duration)
return failure_duration < MIN_UNAVAILABLE_DURATION
async def get_routines(self) -> list[HomeDataScene]:
"""Get routines."""
try:

View File

@@ -1,9 +1,11 @@
"""Test for Roborock init."""
import datetime
import pathlib
from typing import Any
from unittest.mock import AsyncMock, patch
from freezegun.api import FrozenDateTimeFactory
import pytest
from roborock import (
RoborockInvalidCredentials,
@@ -12,9 +14,13 @@ from roborock import (
)
from roborock.exceptions import RoborockException
from homeassistant.components.homeassistant import (
DOMAIN as HA_DOMAIN,
SERVICE_UPDATE_ENTITY,
)
from homeassistant.components.roborock.const import DOMAIN
from homeassistant.config_entries import ConfigEntryState
from homeassistant.const import Platform
from homeassistant.const import ATTR_ENTITY_ID, Platform
from homeassistant.core import HomeAssistant
from homeassistant.helpers import issue_registry as ir
from homeassistant.helpers.device_registry import DeviceRegistry
@@ -23,7 +29,7 @@ from homeassistant.setup import async_setup_component
from .conftest import FakeDevice
from .mock_data import ROBOROCK_RRUID, USER_EMAIL
from tests.common import MockConfigEntry
from tests.common import MockConfigEntry, async_fire_time_changed
from tests.typing import ClientSessionGenerator
@@ -295,6 +301,72 @@ async def test_migrate_config_entry_unique_id(
assert config_entry.unique_id == ROBOROCK_RRUID
@pytest.mark.parametrize("platforms", [[Platform.SENSOR]])
async def test_update_unavailability_threshold(
hass: HomeAssistant,
freezer: FrozenDateTimeFactory,
setup_entry: MockConfigEntry,
fake_vacuum: FakeDevice,
) -> None:
"""Test that a small number of update failures are suppressed before marking a device unavailable."""
await async_setup_component(hass, HA_DOMAIN, {})
assert setup_entry.state is ConfigEntryState.LOADED
# We pick an arbitrary sensor to test for availability
sensor_entity_id = "sensor.roborock_s7_maxv_battery"
expected_state = "100"
state = hass.states.get(sensor_entity_id)
assert state is not None
assert state.state == expected_state
# Simulate a few update failures below the threshold
assert fake_vacuum.v1_properties is not None
fake_vacuum.v1_properties.status.refresh.side_effect = RoborockException(
"Simulated update failure"
)
# Move forward in time less than the threshold
freezer.tick(datetime.timedelta(seconds=90))
async_fire_time_changed(hass)
await hass.async_block_till_done()
# Force a coordinator refresh.
await hass.services.async_call(
HA_DOMAIN,
SERVICE_UPDATE_ENTITY,
{ATTR_ENTITY_ID: sensor_entity_id},
blocking=True,
)
await hass.async_block_till_done()
# Verify that the entity is still available
state = hass.states.get(sensor_entity_id)
assert state is not None
assert state.state == expected_state
# Move forward in time to exceed the threshold
freezer.tick(datetime.timedelta(minutes=3))
async_fire_time_changed(hass)
await hass.async_block_till_done()
# Verify that the entity is now unavailable
state = hass.states.get(sensor_entity_id)
assert state is not None
assert state.state == "unavailable"
# Now restore normal update behavior and refresh.
fake_vacuum.v1_properties.status.refresh.side_effect = None
freezer.tick(datetime.timedelta(seconds=45))
async_fire_time_changed(hass)
await hass.async_block_till_done()
# Verify that the entity recovers and is available again
state = hass.states.get(sensor_entity_id)
assert state is not None
assert state.state == expected_state
async def test_cloud_api_repair(
hass: HomeAssistant,
mock_roborock_entry: MockConfigEntry,