1
0
mirror of https://github.com/home-assistant/supervisor.git synced 2026-07-03 03:45:45 +01:00
Files
supervisor/tests/plugins/test_plugin_base.py
T
Stefan Agner f4962208b0 watchdog: include container exit code in restart log message (#6873)
* watchdog: include container exit code in restart log message

Reported in #6868: a user saw a tight loop of "Watchdog found app
phpMyAdmin is failed, restarting..." and assumed the watchdog itself
was the problem, asking whether its window could be widened. The
message gives no hint that the container is exiting on its own, nor
what exit code it returned.

#6848 already plumbed the container exit code through
DockerContainerStateEvent and added a separate log line in
container_state_changed when an app exits non-zero. Build on that by
forwarding event.exit_code into _restart_after_problem for apps, Home
Assistant Core, and plugins, and use it in the watchdog warning when
the state is FAILED. The fallback message is kept for STOPPED and
UNHEALTHY where an exit code is not meaningful.

After this change the example above reads "Watchdog found app
phpMyAdmin exited with code 1, restarting...", making it immediately
clear that the container itself is dying and giving the user a code
to grep for in the add-on logs.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* Drop unnecessary None check

* tests: set exit_code on FAILED watchdog events

The previous commit dropped the `exit_code is not None` guard from the
watchdog `_restart_after_problem` log statements, which assumed the
production invariant that `ContainerState.FAILED` always carries a
non-None exit code (enforced by `docker/monitor.py` and
`docker/interface.py`). Several tests, however, fired FAILED
`DockerContainerStateEvent`s with no `exit_code`, causing the new
unconditional `%d` formatter to raise `TypeError` at log time.

Align the test fixtures with the production invariant by passing
`exit_code=1` on FAILED events in the apps, Home Assistant Core, plugin
base, and DNS plugin watchdog tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-26 15:58:54 +02:00

399 lines
13 KiB
Python

"""Test base plugin functionality."""
from unittest.mock import ANY, Mock, PropertyMock, call, patch
from aiodocker.containers import DockerContainer
from awesomeversion import AwesomeVersion
import pytest
from supervisor.const import BusEvent, CpuArch
from supervisor.coresys import CoreSys
from supervisor.docker.const import ContainerState
from supervisor.docker.interface import DockerInterface
from supervisor.docker.manager import DockerAPI
from supervisor.docker.monitor import DockerContainerStateEvent
from supervisor.exceptions import (
AudioError,
AudioJobError,
CliError,
CliJobError,
CoreDNSError,
CoreDNSJobError,
DockerError,
MulticastError,
MulticastJobError,
ObserverError,
ObserverJobError,
PluginError,
PluginJobError,
)
from supervisor.plugins.audio import PluginAudio
from supervisor.plugins.base import PluginBase
from supervisor.plugins.cli import PluginCli
from supervisor.plugins.dns import PluginDns
from supervisor.plugins.multicast import PluginMulticast
from supervisor.plugins.observer import PluginObserver
from supervisor.utils import check_exception_chain
from tests.common import fire_bus_event
@pytest.fixture(name="plugin")
async def fixture_plugin(
coresys: CoreSys, request: pytest.FixtureRequest
) -> PluginBase:
"""Get plugin from param."""
if request.param == PluginAudio:
yield coresys.plugins.audio
elif request.param == PluginCli:
yield coresys.plugins.cli
elif request.param == PluginDns:
with patch.object(PluginDns, "loop_detection"):
yield coresys.plugins.dns
elif request.param == PluginMulticast:
yield coresys.plugins.multicast
elif request.param == PluginObserver:
yield coresys.plugins.observer
@pytest.mark.parametrize(
"plugin",
[PluginAudio, PluginCli, PluginDns, PluginMulticast, PluginObserver],
indirect=True,
)
async def test_plugin_watchdog(coresys: CoreSys, plugin: PluginBase) -> None:
"""Test plugin watchdog works correctly."""
with (
patch.object(type(plugin.instance), "attach"),
patch.object(type(plugin.instance), "is_running", return_value=True),
):
await plugin.load()
with (
patch.object(type(plugin), "rebuild") as rebuild,
patch.object(type(plugin), "start") as start,
patch.object(type(plugin.instance), "current_state") as current_state,
):
current_state.return_value = ContainerState.UNHEALTHY
await fire_bus_event(
coresys,
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
DockerContainerStateEvent(
name=plugin.instance.name,
state=ContainerState.UNHEALTHY,
id="abc123",
time=1,
),
)
rebuild.assert_called_once()
start.assert_not_called()
rebuild.reset_mock()
current_state.return_value = ContainerState.FAILED
await fire_bus_event(
coresys,
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
DockerContainerStateEvent(
name=plugin.instance.name,
state=ContainerState.FAILED,
id="abc123",
time=1,
exit_code=1,
),
)
rebuild.assert_called_once()
start.assert_not_called()
rebuild.reset_mock()
# Stop should be ignored as it means an update or system shutdown, plugins don't stop otherwise
current_state.return_value = ContainerState.STOPPED
await fire_bus_event(
coresys,
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
DockerContainerStateEvent(
name=plugin.instance.name,
state=ContainerState.STOPPED,
id="abc123",
time=1,
),
)
rebuild.assert_not_called()
start.assert_not_called()
# Do not process event if container state has changed since fired
current_state.return_value = ContainerState.HEALTHY
await fire_bus_event(
coresys,
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
DockerContainerStateEvent(
name=plugin.instance.name,
state=ContainerState.FAILED,
id="abc123",
time=1,
exit_code=1,
),
)
rebuild.assert_not_called()
start.assert_not_called()
# Other containers ignored
await fire_bus_event(
coresys,
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
DockerContainerStateEvent(
name="addon_local_other",
state=ContainerState.UNHEALTHY,
id="abc123",
time=1,
),
)
rebuild.assert_not_called()
start.assert_not_called()
@pytest.mark.parametrize(
("plugin", "error"),
[
(PluginAudio, AudioError()),
(PluginCli, CliError()),
(PluginDns, CoreDNSError()),
(PluginMulticast, MulticastError()),
(PluginObserver, ObserverError()),
],
indirect=["plugin"],
)
@pytest.mark.usefixtures("coresys", "tmp_supervisor_data", "path_extern")
async def test_plugin_watchdog_max_failed_attempts(
capture_exception: Mock,
plugin: PluginBase,
error: PluginError,
container: DockerContainer,
caplog: pytest.LogCaptureFixture,
) -> None:
"""Test plugin watchdog gives up after max failed attempts."""
with patch.object(type(plugin.instance), "attach"):
await plugin.load()
container.show.return_value["State"]["Status"] = "stopped"
container.show.return_value["State"]["Running"] = False
container.show.return_value["State"]["ExitCode"] = 1
with (
patch("supervisor.plugins.base.WATCHDOG_RETRY_SECONDS", 0),
patch.object(type(plugin), "start", side_effect=error) as start,
):
await plugin.watchdog_container(
DockerContainerStateEvent(
name=plugin.instance.name,
state=ContainerState.FAILED,
id="abc123",
time=1,
exit_code=1,
)
)
assert start.call_count == 5
capture_exception.assert_called_with(error)
assert (
f"Watchdog cannot restart {plugin.slug} plugin, failed all 5 attempts"
in caplog.text
)
@pytest.mark.parametrize(
"plugin",
[PluginAudio, PluginCli, PluginDns, PluginMulticast, PluginObserver],
indirect=True,
)
async def test_plugin_load_running_container(
coresys: CoreSys, plugin: PluginBase
) -> None:
"""Test plugins load and attach to a running container."""
test_version = AwesomeVersion("2022.7.3")
with (
patch.object(type(coresys.bus), "register_event") as register_event,
patch.object(type(plugin.instance), "attach") as attach,
patch.object(type(plugin), "install") as install,
patch.object(type(plugin), "start") as start,
patch.object(
type(plugin.instance),
"get_latest_version",
return_value=test_version,
),
patch.object(type(plugin.instance), "is_running", return_value=True),
):
await plugin.load()
register_event.assert_any_call(
BusEvent.DOCKER_CONTAINER_STATE_CHANGE, plugin.watchdog_container
)
attach.assert_called_once_with(
version=test_version, skip_state_event_if_down=True
)
install.assert_not_called()
start.assert_not_called()
@pytest.mark.parametrize(
"plugin",
[PluginAudio, PluginCli, PluginDns, PluginMulticast, PluginObserver],
indirect=True,
)
async def test_plugin_load_stopped_container(
coresys: CoreSys, plugin: PluginBase
) -> None:
"""Test plugins load and start existing container."""
test_version = AwesomeVersion("2022.7.3")
with (
patch.object(type(coresys.bus), "register_event") as register_event,
patch.object(type(plugin.instance), "attach") as attach,
patch.object(type(plugin), "install") as install,
patch.object(type(plugin), "start") as start,
patch.object(
type(plugin.instance),
"get_latest_version",
return_value=test_version,
),
patch.object(type(plugin.instance), "is_running", return_value=False),
):
await plugin.load()
register_event.assert_any_call(
BusEvent.DOCKER_CONTAINER_STATE_CHANGE, plugin.watchdog_container
)
attach.assert_called_once_with(
version=test_version, skip_state_event_if_down=True
)
install.assert_not_called()
start.assert_called_once()
@pytest.mark.parametrize(
"plugin",
[PluginAudio, PluginCli, PluginDns, PluginMulticast, PluginObserver],
indirect=True,
)
async def test_plugin_load_missing_container(
coresys: CoreSys, plugin: PluginBase
) -> None:
"""Test plugins load and create and start container."""
test_version = AwesomeVersion("2022.7.3")
with (
patch.object(type(coresys.bus), "register_event") as register_event,
patch.object(
type(plugin.instance), "attach", side_effect=DockerError()
) as attach,
patch.object(type(plugin), "install") as install,
patch.object(type(plugin), "start") as start,
patch.object(
type(plugin.instance),
"get_latest_version",
return_value=test_version,
),
patch.object(type(plugin.instance), "is_running", return_value=False),
):
await plugin.load()
register_event.assert_any_call(
BusEvent.DOCKER_CONTAINER_STATE_CHANGE, plugin.watchdog_container
)
attach.assert_called_once_with(
version=test_version, skip_state_event_if_down=True
)
install.assert_called_once()
start.assert_called_once()
@pytest.mark.parametrize(
("plugin", "error"),
[
(PluginAudio, AudioJobError),
(PluginCli, CliJobError),
(PluginDns, CoreDNSJobError),
(PluginMulticast, MulticastJobError),
(PluginObserver, ObserverJobError),
],
indirect=["plugin"],
)
async def test_update_fails_if_out_of_date(
coresys: CoreSys, plugin: PluginBase, error: PluginJobError
):
"""Test update of plugins fail when supervisor is out of date."""
coresys.hardware.disk.get_disk_free_space = lambda x: 5000
with (
patch.object(
type(coresys.supervisor), "need_update", new=PropertyMock(return_value=True)
),
pytest.raises(error),
):
await plugin.update()
@pytest.mark.parametrize(
"plugin",
[PluginAudio, PluginCli, PluginDns, PluginMulticast, PluginObserver],
indirect=True,
)
@pytest.mark.usefixtures("coresys")
async def test_repair_failed(capture_exception: Mock, plugin: PluginBase):
"""Test repair failed."""
with (
patch.object(DockerInterface, "exists", return_value=False),
patch.object(
DockerInterface, "arch", new=PropertyMock(return_value=CpuArch.AMD64)
),
patch.object(DockerInterface, "install", side_effect=DockerError),
):
await plugin.repair()
capture_exception.assert_called_once()
assert check_exception_chain(capture_exception.call_args[0][0], DockerError)
@pytest.mark.parametrize(
"plugin",
[PluginAudio, PluginCli, PluginDns, PluginMulticast, PluginObserver],
indirect=True,
)
async def test_load_with_incorrect_image(
coresys: CoreSys, container: DockerContainer, plugin: PluginBase
):
"""Test plugin loads with the incorrect image."""
plugin.image = old_image = f"ghcr.io/home-assistant/aarch64-hassio-{plugin.slug}"
correct_image = f"ghcr.io/home-assistant/amd64-hassio-{plugin.slug}"
coresys.updater._data["image"][plugin.slug] = correct_image # pylint: disable=protected-access
plugin.version = AwesomeVersion("2024.4.0")
container.show.return_value["State"]["Status"] = "running"
container.show.return_value["State"]["Running"] = True
coresys.docker.images.inspect.return_value = img_data = (
coresys.docker.images.inspect.return_value
| {"Config": {"Labels": {"io.hass.version": "2024.4.0"}}}
)
container.show.return_value |= img_data
with patch.object(DockerAPI, "pull_image", return_value=img_data) as pull_image:
await plugin.load()
pull_image.assert_called_once_with(
ANY, correct_image, "2024.4.0", platform="linux/amd64", auth=None
)
container.delete.assert_called_once_with(force=True, v=True)
assert coresys.docker.images.delete.call_args_list[0] == call(
f"{old_image}:latest",
force=True,
)
assert coresys.docker.images.delete.call_args_list[1] == call(
f"{old_image}:2024.4.0",
force=True,
)
assert plugin.image == correct_image
@pytest.mark.parametrize(
"plugin",
[PluginAudio, PluginCli, PluginDns, PluginMulticast, PluginObserver],
indirect=True,
)
async def test_default_image_fallback(coresys: CoreSys, plugin: PluginBase):
"""Test default image falls back to hard-coded constant if we fail to fetch version file."""
assert getattr(coresys.updater, f"image_{plugin.slug}") is None
assert plugin.default_image == f"ghcr.io/home-assistant/amd64-hassio-{plugin.slug}"