mirror of
https://github.com/home-assistant/supervisor.git
synced 2026-07-03 03:45:45 +01:00
f4962208b0
* watchdog: include container exit code in restart log message Reported in #6868: a user saw a tight loop of "Watchdog found app phpMyAdmin is failed, restarting..." and assumed the watchdog itself was the problem, asking whether its window could be widened. The message gives no hint that the container is exiting on its own, nor what exit code it returned. #6848 already plumbed the container exit code through DockerContainerStateEvent and added a separate log line in container_state_changed when an app exits non-zero. Build on that by forwarding event.exit_code into _restart_after_problem for apps, Home Assistant Core, and plugins, and use it in the watchdog warning when the state is FAILED. The fallback message is kept for STOPPED and UNHEALTHY where an exit code is not meaningful. After this change the example above reads "Watchdog found app phpMyAdmin exited with code 1, restarting...", making it immediately clear that the container itself is dying and giving the user a code to grep for in the add-on logs. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * Drop unnecessary None check * tests: set exit_code on FAILED watchdog events The previous commit dropped the `exit_code is not None` guard from the watchdog `_restart_after_problem` log statements, which assumed the production invariant that `ContainerState.FAILED` always carries a non-None exit code (enforced by `docker/monitor.py` and `docker/interface.py`). Several tests, however, fired FAILED `DockerContainerStateEvent`s with no `exit_code`, causing the new unconditional `%d` formatter to raise `TypeError` at log time. Align the test fixtures with the production invariant by passing `exit_code=1` on FAILED events in the apps, Home Assistant Core, plugin base, and DNS plugin watchdog tests. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
399 lines
13 KiB
Python
399 lines
13 KiB
Python
"""Test base plugin functionality."""
|
|
|
|
from unittest.mock import ANY, Mock, PropertyMock, call, patch
|
|
|
|
from aiodocker.containers import DockerContainer
|
|
from awesomeversion import AwesomeVersion
|
|
import pytest
|
|
|
|
from supervisor.const import BusEvent, CpuArch
|
|
from supervisor.coresys import CoreSys
|
|
from supervisor.docker.const import ContainerState
|
|
from supervisor.docker.interface import DockerInterface
|
|
from supervisor.docker.manager import DockerAPI
|
|
from supervisor.docker.monitor import DockerContainerStateEvent
|
|
from supervisor.exceptions import (
|
|
AudioError,
|
|
AudioJobError,
|
|
CliError,
|
|
CliJobError,
|
|
CoreDNSError,
|
|
CoreDNSJobError,
|
|
DockerError,
|
|
MulticastError,
|
|
MulticastJobError,
|
|
ObserverError,
|
|
ObserverJobError,
|
|
PluginError,
|
|
PluginJobError,
|
|
)
|
|
from supervisor.plugins.audio import PluginAudio
|
|
from supervisor.plugins.base import PluginBase
|
|
from supervisor.plugins.cli import PluginCli
|
|
from supervisor.plugins.dns import PluginDns
|
|
from supervisor.plugins.multicast import PluginMulticast
|
|
from supervisor.plugins.observer import PluginObserver
|
|
from supervisor.utils import check_exception_chain
|
|
|
|
from tests.common import fire_bus_event
|
|
|
|
|
|
@pytest.fixture(name="plugin")
|
|
async def fixture_plugin(
|
|
coresys: CoreSys, request: pytest.FixtureRequest
|
|
) -> PluginBase:
|
|
"""Get plugin from param."""
|
|
if request.param == PluginAudio:
|
|
yield coresys.plugins.audio
|
|
elif request.param == PluginCli:
|
|
yield coresys.plugins.cli
|
|
elif request.param == PluginDns:
|
|
with patch.object(PluginDns, "loop_detection"):
|
|
yield coresys.plugins.dns
|
|
elif request.param == PluginMulticast:
|
|
yield coresys.plugins.multicast
|
|
elif request.param == PluginObserver:
|
|
yield coresys.plugins.observer
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"plugin",
|
|
[PluginAudio, PluginCli, PluginDns, PluginMulticast, PluginObserver],
|
|
indirect=True,
|
|
)
|
|
async def test_plugin_watchdog(coresys: CoreSys, plugin: PluginBase) -> None:
|
|
"""Test plugin watchdog works correctly."""
|
|
with (
|
|
patch.object(type(plugin.instance), "attach"),
|
|
patch.object(type(plugin.instance), "is_running", return_value=True),
|
|
):
|
|
await plugin.load()
|
|
|
|
with (
|
|
patch.object(type(plugin), "rebuild") as rebuild,
|
|
patch.object(type(plugin), "start") as start,
|
|
patch.object(type(plugin.instance), "current_state") as current_state,
|
|
):
|
|
current_state.return_value = ContainerState.UNHEALTHY
|
|
await fire_bus_event(
|
|
coresys,
|
|
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
|
|
DockerContainerStateEvent(
|
|
name=plugin.instance.name,
|
|
state=ContainerState.UNHEALTHY,
|
|
id="abc123",
|
|
time=1,
|
|
),
|
|
)
|
|
rebuild.assert_called_once()
|
|
start.assert_not_called()
|
|
|
|
rebuild.reset_mock()
|
|
current_state.return_value = ContainerState.FAILED
|
|
await fire_bus_event(
|
|
coresys,
|
|
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
|
|
DockerContainerStateEvent(
|
|
name=plugin.instance.name,
|
|
state=ContainerState.FAILED,
|
|
id="abc123",
|
|
time=1,
|
|
exit_code=1,
|
|
),
|
|
)
|
|
rebuild.assert_called_once()
|
|
start.assert_not_called()
|
|
|
|
rebuild.reset_mock()
|
|
# Stop should be ignored as it means an update or system shutdown, plugins don't stop otherwise
|
|
current_state.return_value = ContainerState.STOPPED
|
|
await fire_bus_event(
|
|
coresys,
|
|
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
|
|
DockerContainerStateEvent(
|
|
name=plugin.instance.name,
|
|
state=ContainerState.STOPPED,
|
|
id="abc123",
|
|
time=1,
|
|
),
|
|
)
|
|
rebuild.assert_not_called()
|
|
start.assert_not_called()
|
|
|
|
# Do not process event if container state has changed since fired
|
|
current_state.return_value = ContainerState.HEALTHY
|
|
await fire_bus_event(
|
|
coresys,
|
|
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
|
|
DockerContainerStateEvent(
|
|
name=plugin.instance.name,
|
|
state=ContainerState.FAILED,
|
|
id="abc123",
|
|
time=1,
|
|
exit_code=1,
|
|
),
|
|
)
|
|
rebuild.assert_not_called()
|
|
start.assert_not_called()
|
|
|
|
# Other containers ignored
|
|
await fire_bus_event(
|
|
coresys,
|
|
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
|
|
DockerContainerStateEvent(
|
|
name="addon_local_other",
|
|
state=ContainerState.UNHEALTHY,
|
|
id="abc123",
|
|
time=1,
|
|
),
|
|
)
|
|
rebuild.assert_not_called()
|
|
start.assert_not_called()
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("plugin", "error"),
|
|
[
|
|
(PluginAudio, AudioError()),
|
|
(PluginCli, CliError()),
|
|
(PluginDns, CoreDNSError()),
|
|
(PluginMulticast, MulticastError()),
|
|
(PluginObserver, ObserverError()),
|
|
],
|
|
indirect=["plugin"],
|
|
)
|
|
@pytest.mark.usefixtures("coresys", "tmp_supervisor_data", "path_extern")
|
|
async def test_plugin_watchdog_max_failed_attempts(
|
|
capture_exception: Mock,
|
|
plugin: PluginBase,
|
|
error: PluginError,
|
|
container: DockerContainer,
|
|
caplog: pytest.LogCaptureFixture,
|
|
) -> None:
|
|
"""Test plugin watchdog gives up after max failed attempts."""
|
|
with patch.object(type(plugin.instance), "attach"):
|
|
await plugin.load()
|
|
|
|
container.show.return_value["State"]["Status"] = "stopped"
|
|
container.show.return_value["State"]["Running"] = False
|
|
container.show.return_value["State"]["ExitCode"] = 1
|
|
with (
|
|
patch("supervisor.plugins.base.WATCHDOG_RETRY_SECONDS", 0),
|
|
patch.object(type(plugin), "start", side_effect=error) as start,
|
|
):
|
|
await plugin.watchdog_container(
|
|
DockerContainerStateEvent(
|
|
name=plugin.instance.name,
|
|
state=ContainerState.FAILED,
|
|
id="abc123",
|
|
time=1,
|
|
exit_code=1,
|
|
)
|
|
)
|
|
assert start.call_count == 5
|
|
|
|
capture_exception.assert_called_with(error)
|
|
assert (
|
|
f"Watchdog cannot restart {plugin.slug} plugin, failed all 5 attempts"
|
|
in caplog.text
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"plugin",
|
|
[PluginAudio, PluginCli, PluginDns, PluginMulticast, PluginObserver],
|
|
indirect=True,
|
|
)
|
|
async def test_plugin_load_running_container(
|
|
coresys: CoreSys, plugin: PluginBase
|
|
) -> None:
|
|
"""Test plugins load and attach to a running container."""
|
|
test_version = AwesomeVersion("2022.7.3")
|
|
with (
|
|
patch.object(type(coresys.bus), "register_event") as register_event,
|
|
patch.object(type(plugin.instance), "attach") as attach,
|
|
patch.object(type(plugin), "install") as install,
|
|
patch.object(type(plugin), "start") as start,
|
|
patch.object(
|
|
type(plugin.instance),
|
|
"get_latest_version",
|
|
return_value=test_version,
|
|
),
|
|
patch.object(type(plugin.instance), "is_running", return_value=True),
|
|
):
|
|
await plugin.load()
|
|
register_event.assert_any_call(
|
|
BusEvent.DOCKER_CONTAINER_STATE_CHANGE, plugin.watchdog_container
|
|
)
|
|
attach.assert_called_once_with(
|
|
version=test_version, skip_state_event_if_down=True
|
|
)
|
|
install.assert_not_called()
|
|
start.assert_not_called()
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"plugin",
|
|
[PluginAudio, PluginCli, PluginDns, PluginMulticast, PluginObserver],
|
|
indirect=True,
|
|
)
|
|
async def test_plugin_load_stopped_container(
|
|
coresys: CoreSys, plugin: PluginBase
|
|
) -> None:
|
|
"""Test plugins load and start existing container."""
|
|
test_version = AwesomeVersion("2022.7.3")
|
|
with (
|
|
patch.object(type(coresys.bus), "register_event") as register_event,
|
|
patch.object(type(plugin.instance), "attach") as attach,
|
|
patch.object(type(plugin), "install") as install,
|
|
patch.object(type(plugin), "start") as start,
|
|
patch.object(
|
|
type(plugin.instance),
|
|
"get_latest_version",
|
|
return_value=test_version,
|
|
),
|
|
patch.object(type(plugin.instance), "is_running", return_value=False),
|
|
):
|
|
await plugin.load()
|
|
register_event.assert_any_call(
|
|
BusEvent.DOCKER_CONTAINER_STATE_CHANGE, plugin.watchdog_container
|
|
)
|
|
attach.assert_called_once_with(
|
|
version=test_version, skip_state_event_if_down=True
|
|
)
|
|
install.assert_not_called()
|
|
start.assert_called_once()
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"plugin",
|
|
[PluginAudio, PluginCli, PluginDns, PluginMulticast, PluginObserver],
|
|
indirect=True,
|
|
)
|
|
async def test_plugin_load_missing_container(
|
|
coresys: CoreSys, plugin: PluginBase
|
|
) -> None:
|
|
"""Test plugins load and create and start container."""
|
|
test_version = AwesomeVersion("2022.7.3")
|
|
with (
|
|
patch.object(type(coresys.bus), "register_event") as register_event,
|
|
patch.object(
|
|
type(plugin.instance), "attach", side_effect=DockerError()
|
|
) as attach,
|
|
patch.object(type(plugin), "install") as install,
|
|
patch.object(type(plugin), "start") as start,
|
|
patch.object(
|
|
type(plugin.instance),
|
|
"get_latest_version",
|
|
return_value=test_version,
|
|
),
|
|
patch.object(type(plugin.instance), "is_running", return_value=False),
|
|
):
|
|
await plugin.load()
|
|
register_event.assert_any_call(
|
|
BusEvent.DOCKER_CONTAINER_STATE_CHANGE, plugin.watchdog_container
|
|
)
|
|
attach.assert_called_once_with(
|
|
version=test_version, skip_state_event_if_down=True
|
|
)
|
|
install.assert_called_once()
|
|
start.assert_called_once()
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("plugin", "error"),
|
|
[
|
|
(PluginAudio, AudioJobError),
|
|
(PluginCli, CliJobError),
|
|
(PluginDns, CoreDNSJobError),
|
|
(PluginMulticast, MulticastJobError),
|
|
(PluginObserver, ObserverJobError),
|
|
],
|
|
indirect=["plugin"],
|
|
)
|
|
async def test_update_fails_if_out_of_date(
|
|
coresys: CoreSys, plugin: PluginBase, error: PluginJobError
|
|
):
|
|
"""Test update of plugins fail when supervisor is out of date."""
|
|
coresys.hardware.disk.get_disk_free_space = lambda x: 5000
|
|
|
|
with (
|
|
patch.object(
|
|
type(coresys.supervisor), "need_update", new=PropertyMock(return_value=True)
|
|
),
|
|
pytest.raises(error),
|
|
):
|
|
await plugin.update()
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"plugin",
|
|
[PluginAudio, PluginCli, PluginDns, PluginMulticast, PluginObserver],
|
|
indirect=True,
|
|
)
|
|
@pytest.mark.usefixtures("coresys")
|
|
async def test_repair_failed(capture_exception: Mock, plugin: PluginBase):
|
|
"""Test repair failed."""
|
|
with (
|
|
patch.object(DockerInterface, "exists", return_value=False),
|
|
patch.object(
|
|
DockerInterface, "arch", new=PropertyMock(return_value=CpuArch.AMD64)
|
|
),
|
|
patch.object(DockerInterface, "install", side_effect=DockerError),
|
|
):
|
|
await plugin.repair()
|
|
|
|
capture_exception.assert_called_once()
|
|
assert check_exception_chain(capture_exception.call_args[0][0], DockerError)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"plugin",
|
|
[PluginAudio, PluginCli, PluginDns, PluginMulticast, PluginObserver],
|
|
indirect=True,
|
|
)
|
|
async def test_load_with_incorrect_image(
|
|
coresys: CoreSys, container: DockerContainer, plugin: PluginBase
|
|
):
|
|
"""Test plugin loads with the incorrect image."""
|
|
plugin.image = old_image = f"ghcr.io/home-assistant/aarch64-hassio-{plugin.slug}"
|
|
correct_image = f"ghcr.io/home-assistant/amd64-hassio-{plugin.slug}"
|
|
coresys.updater._data["image"][plugin.slug] = correct_image # pylint: disable=protected-access
|
|
plugin.version = AwesomeVersion("2024.4.0")
|
|
|
|
container.show.return_value["State"]["Status"] = "running"
|
|
container.show.return_value["State"]["Running"] = True
|
|
coresys.docker.images.inspect.return_value = img_data = (
|
|
coresys.docker.images.inspect.return_value
|
|
| {"Config": {"Labels": {"io.hass.version": "2024.4.0"}}}
|
|
)
|
|
container.show.return_value |= img_data
|
|
|
|
with patch.object(DockerAPI, "pull_image", return_value=img_data) as pull_image:
|
|
await plugin.load()
|
|
pull_image.assert_called_once_with(
|
|
ANY, correct_image, "2024.4.0", platform="linux/amd64", auth=None
|
|
)
|
|
|
|
container.delete.assert_called_once_with(force=True, v=True)
|
|
assert coresys.docker.images.delete.call_args_list[0] == call(
|
|
f"{old_image}:latest",
|
|
force=True,
|
|
)
|
|
assert coresys.docker.images.delete.call_args_list[1] == call(
|
|
f"{old_image}:2024.4.0",
|
|
force=True,
|
|
)
|
|
assert plugin.image == correct_image
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"plugin",
|
|
[PluginAudio, PluginCli, PluginDns, PluginMulticast, PluginObserver],
|
|
indirect=True,
|
|
)
|
|
async def test_default_image_fallback(coresys: CoreSys, plugin: PluginBase):
|
|
"""Test default image falls back to hard-coded constant if we fail to fetch version file."""
|
|
assert getattr(coresys.updater, f"image_{plugin.slug}") is None
|
|
assert plugin.default_image == f"ghcr.io/home-assistant/amd64-hassio-{plugin.slug}"
|