mirror of
https://github.com/home-assistant/supervisor.git
synced 2026-07-02 11:25:37 +01:00
f4962208b0
* watchdog: include container exit code in restart log message Reported in #6868: a user saw a tight loop of "Watchdog found app phpMyAdmin is failed, restarting..." and assumed the watchdog itself was the problem, asking whether its window could be widened. The message gives no hint that the container is exiting on its own, nor what exit code it returned. #6848 already plumbed the container exit code through DockerContainerStateEvent and added a separate log line in container_state_changed when an app exits non-zero. Build on that by forwarding event.exit_code into _restart_after_problem for apps, Home Assistant Core, and plugins, and use it in the watchdog warning when the state is FAILED. The fallback message is kept for STOPPED and UNHEALTHY where an exit code is not meaningful. After this change the example above reads "Watchdog found app phpMyAdmin exited with code 1, restarting...", making it immediately clear that the container itself is dying and giving the user a code to grep for in the add-on logs. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * Drop unnecessary None check * tests: set exit_code on FAILED watchdog events The previous commit dropped the `exit_code is not None` guard from the watchdog `_restart_after_problem` log statements, which assumed the production invariant that `ContainerState.FAILED` always carries a non-None exit code (enforced by `docker/monitor.py` and `docker/interface.py`). Several tests, however, fired FAILED `DockerContainerStateEvent`s with no `exit_code`, causing the new unconditional `%d` formatter to raise `TypeError` at log time. Align the test fixtures with the production invariant by passing `exit_code=1` on FAILED events in the apps, Home Assistant Core, plugin base, and DNS plugin watchdog tests. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
475 lines
16 KiB
Python
475 lines
16 KiB
Python
"""Test DNS plugin."""
|
|
|
|
import asyncio
|
|
import errno
|
|
from ipaddress import IPv4Address
|
|
from pathlib import Path
|
|
from unittest.mock import AsyncMock, Mock, patch
|
|
|
|
from aiodocker.containers import DockerContainer
|
|
import pytest
|
|
|
|
from supervisor.const import BusEvent, LogLevel
|
|
from supervisor.coresys import CoreSys
|
|
from supervisor.docker.const import ContainerState
|
|
from supervisor.docker.dns import DockerDNS
|
|
from supervisor.docker.monitor import DockerContainerStateEvent
|
|
from supervisor.plugins.dns import HostEntry, PluginDns
|
|
from supervisor.resolution.const import ContextType, IssueType, SuggestionType
|
|
from supervisor.resolution.data import Issue, Suggestion
|
|
|
|
|
|
@pytest.fixture(name="docker_interface")
|
|
async def fixture_docker_interface() -> tuple[AsyncMock, AsyncMock]:
|
|
"""Mock docker interface methods."""
|
|
with (
|
|
patch.object(DockerDNS, "run") as run,
|
|
patch.object(DockerDNS, "restart") as restart,
|
|
):
|
|
yield (run, restart)
|
|
|
|
|
|
@pytest.fixture(name="write_json")
|
|
async def fixture_write_json() -> Mock:
|
|
"""Mock json file writer."""
|
|
with patch("supervisor.plugins.dns.write_json_file") as write_json_file:
|
|
yield write_json_file
|
|
|
|
|
|
@pytest.fixture(name="mock_call_later")
|
|
def fixture_mock_call_later(coresys: CoreSys):
|
|
"""Mock sys_call_later with zero delay for testing."""
|
|
|
|
def mock_call_later(_delay, *args, **kwargs) -> asyncio.TimerHandle:
|
|
"""Mock to remove delay."""
|
|
return coresys.call_later(0, *args, **kwargs)
|
|
|
|
return mock_call_later
|
|
|
|
|
|
async def test_config_write(
|
|
coresys: CoreSys,
|
|
docker_interface: tuple[AsyncMock, AsyncMock],
|
|
write_json: Mock,
|
|
):
|
|
"""Test config write on DNS start and restart."""
|
|
assert coresys.plugins.dns.locals == ["dns://192.168.30.1"]
|
|
coresys.plugins.dns.servers = ["dns://1.1.1.1", "dns://8.8.8.8"]
|
|
|
|
await coresys.plugins.dns.start()
|
|
docker_interface[0].assert_called_once()
|
|
docker_interface[1].assert_not_called()
|
|
|
|
write_json.assert_called_once_with(
|
|
Path("/data/dns/coredns.json"),
|
|
{
|
|
"servers": ["dns://1.1.1.1", "dns://8.8.8.8"],
|
|
"locals": ["dns://192.168.30.1"],
|
|
"fallback": True,
|
|
"debug": False,
|
|
},
|
|
)
|
|
|
|
docker_interface[0].reset_mock()
|
|
write_json.reset_mock()
|
|
coresys.plugins.dns.servers = ["dns://8.8.8.8"]
|
|
coresys.plugins.dns.fallback = False
|
|
coresys.config.logging = LogLevel.DEBUG
|
|
|
|
await coresys.plugins.dns.restart()
|
|
docker_interface[0].assert_not_called()
|
|
docker_interface[1].assert_called_once()
|
|
|
|
write_json.assert_called_once_with(
|
|
Path("/data/dns/coredns.json"),
|
|
{
|
|
"servers": ["dns://8.8.8.8"],
|
|
"locals": ["dns://192.168.30.1"],
|
|
"fallback": False,
|
|
"debug": True,
|
|
},
|
|
)
|
|
|
|
|
|
async def test_reset(coresys: CoreSys):
|
|
"""Test reset returns dns plugin to defaults."""
|
|
coresys.plugins.dns.servers = ["dns://1.1.1.1", "dns://8.8.8.8"]
|
|
coresys.plugins.dns.fallback = False
|
|
coresys.plugins.dns._loop = True # pylint: disable=protected-access
|
|
assert len(coresys.apps.installed) == 0
|
|
|
|
with (
|
|
patch.object(type(coresys.plugins.dns.hosts), "unlink") as unlink,
|
|
patch.object(type(coresys.plugins.dns), "write_hosts") as write_hosts,
|
|
):
|
|
await coresys.plugins.dns.reset()
|
|
|
|
assert coresys.plugins.dns.servers == []
|
|
assert coresys.plugins.dns.fallback is True
|
|
assert coresys.plugins.dns._loop is False # pylint: disable=protected-access
|
|
unlink.assert_called_once()
|
|
write_hosts.assert_called_once()
|
|
|
|
# Verify the hosts data structure is properly initialized
|
|
# pylint: disable=protected-access
|
|
assert coresys.plugins.dns._hosts == [
|
|
HostEntry(
|
|
ip_address=IPv4Address("127.0.0.1"),
|
|
names=["localhost", "localhost.local.hass.io"],
|
|
),
|
|
HostEntry(
|
|
ip_address=IPv4Address("172.30.32.2"),
|
|
names=[
|
|
"hassio",
|
|
"hassio.local.hass.io",
|
|
"supervisor",
|
|
"supervisor.local.hass.io",
|
|
],
|
|
),
|
|
HostEntry(
|
|
ip_address=IPv4Address("172.30.32.1"),
|
|
names=[
|
|
"homeassistant",
|
|
"homeassistant.local.hass.io",
|
|
"home-assistant",
|
|
"home-assistant.local.hass.io",
|
|
],
|
|
),
|
|
HostEntry(
|
|
ip_address=IPv4Address("172.30.32.3"),
|
|
names=["dns", "dns.local.hass.io"],
|
|
),
|
|
HostEntry(
|
|
ip_address=IPv4Address("172.30.32.6"),
|
|
names=["observer", "observer.local.hass.io"],
|
|
),
|
|
]
|
|
|
|
|
|
async def test_loop_detection_on_failure(coresys: CoreSys, container: DockerContainer):
|
|
"""Test loop detection when coredns fails."""
|
|
assert len(coresys.resolution.issues) == 0
|
|
assert len(coresys.resolution.suggestions) == 0
|
|
|
|
with (
|
|
patch.object(DockerDNS, "attach"),
|
|
patch.object(DockerDNS, "is_running", return_value=True),
|
|
):
|
|
await coresys.plugins.dns.load()
|
|
|
|
with (
|
|
patch.object(PluginDns, "rebuild") as rebuild,
|
|
patch.object(
|
|
DockerDNS,
|
|
"current_state",
|
|
side_effect=[
|
|
ContainerState.FAILED,
|
|
ContainerState.FAILED,
|
|
],
|
|
),
|
|
):
|
|
coresys.bus.fire_event(
|
|
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
|
|
DockerContainerStateEvent(
|
|
name="hassio_dns",
|
|
state=ContainerState.FAILED,
|
|
id="abc123",
|
|
time=1,
|
|
exit_code=1,
|
|
),
|
|
)
|
|
await asyncio.sleep(0)
|
|
assert len(coresys.resolution.issues) == 0
|
|
assert len(coresys.resolution.suggestions) == 0
|
|
rebuild.assert_called_once()
|
|
|
|
rebuild.reset_mock()
|
|
container.log.return_value = ["plugin/loop: Loop"]
|
|
coresys.bus.fire_event(
|
|
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
|
|
DockerContainerStateEvent(
|
|
name="hassio_dns",
|
|
state=ContainerState.FAILED,
|
|
id="abc123",
|
|
time=1,
|
|
exit_code=1,
|
|
),
|
|
)
|
|
await asyncio.sleep(0)
|
|
assert coresys.resolution.issues == [
|
|
Issue(IssueType.DNS_LOOP, ContextType.PLUGIN, "dns")
|
|
]
|
|
assert coresys.resolution.suggestions == [
|
|
Suggestion(SuggestionType.EXECUTE_RESET, ContextType.PLUGIN, "dns")
|
|
]
|
|
rebuild.assert_called_once()
|
|
|
|
|
|
async def test_load_error(
|
|
coresys: CoreSys, caplog: pytest.LogCaptureFixture, container
|
|
):
|
|
"""Test error reading config files during load."""
|
|
with (
|
|
patch("supervisor.plugins.dns.Path.read_text", side_effect=(err := OSError())),
|
|
patch("supervisor.plugins.dns.Path.write_text", side_effect=err),
|
|
):
|
|
err.errno = errno.EBUSY
|
|
await coresys.plugins.dns.load()
|
|
|
|
assert "Can't read resolve.tmpl" in caplog.text
|
|
assert "Can't read hosts.tmpl" in caplog.text
|
|
assert coresys.core.healthy is True
|
|
|
|
caplog.clear()
|
|
err.errno = errno.EBADMSG
|
|
await coresys.plugins.dns.load()
|
|
|
|
assert "Can't read resolve.tmpl" in caplog.text
|
|
assert "Can't read hosts.tmpl" in caplog.text
|
|
assert coresys.core.healthy is False
|
|
|
|
|
|
async def test_load_error_writing_resolv(
|
|
coresys: CoreSys, caplog: pytest.LogCaptureFixture, container
|
|
):
|
|
"""Test error writing resolv during load."""
|
|
with patch(
|
|
"supervisor.plugins.dns.Path.write_text", side_effect=(err := OSError())
|
|
):
|
|
err.errno = errno.EBUSY
|
|
await coresys.plugins.dns.load()
|
|
|
|
assert "Can't write/update /etc/resolv.conf" in caplog.text
|
|
assert coresys.core.healthy is True
|
|
|
|
caplog.clear()
|
|
err.errno = errno.EBADMSG
|
|
await coresys.plugins.dns.load()
|
|
|
|
assert "Can't write/update /etc/resolv.conf" in caplog.text
|
|
assert coresys.core.healthy is False
|
|
|
|
|
|
async def test_notify_locals_changed_end_to_end_with_changes_and_running(
|
|
coresys: CoreSys, mock_call_later
|
|
):
|
|
"""Test notify_locals_changed end-to-end: local DNS changes detected and plugin restarted."""
|
|
dns_plugin = coresys.plugins.dns
|
|
|
|
# Set cached locals to something different from current network state
|
|
current_locals = dns_plugin._compute_locals()
|
|
dns_plugin._cached_locals = (
|
|
["dns://192.168.1.1"]
|
|
if current_locals != ["dns://192.168.1.1"]
|
|
else ["dns://192.168.1.2"]
|
|
)
|
|
|
|
with (
|
|
patch.object(dns_plugin, "restart") as mock_restart,
|
|
patch.object(dns_plugin.instance, "is_running", return_value=True),
|
|
patch.object(dns_plugin, "sys_call_later", new=mock_call_later),
|
|
):
|
|
# Call notify_locals_changed
|
|
dns_plugin.notify_locals_changed()
|
|
|
|
# Wait for the async task to complete
|
|
await asyncio.sleep(0.1)
|
|
|
|
# Verify restart was called and cached locals were updated
|
|
mock_restart.assert_called_once()
|
|
assert dns_plugin._cached_locals == current_locals
|
|
|
|
|
|
async def test_notify_locals_changed_end_to_end_with_changes_but_not_running(
|
|
coresys: CoreSys, mock_call_later
|
|
):
|
|
"""Test notify_locals_changed end-to-end: local DNS changes detected but plugin not running."""
|
|
dns_plugin = coresys.plugins.dns
|
|
|
|
# Set cached locals to something different from current network state
|
|
current_locals = dns_plugin._compute_locals()
|
|
dns_plugin._cached_locals = (
|
|
["dns://192.168.1.1"]
|
|
if current_locals != ["dns://192.168.1.1"]
|
|
else ["dns://192.168.1.2"]
|
|
)
|
|
|
|
with (
|
|
patch.object(dns_plugin, "restart") as mock_restart,
|
|
patch.object(dns_plugin.instance, "is_running", return_value=False),
|
|
patch.object(dns_plugin, "sys_call_later", new=mock_call_later),
|
|
):
|
|
# Call notify_locals_changed
|
|
dns_plugin.notify_locals_changed()
|
|
|
|
# Wait for the async task to complete
|
|
await asyncio.sleep(0.1)
|
|
|
|
# Verify restart was NOT called but cached locals were still updated
|
|
mock_restart.assert_not_called()
|
|
assert dns_plugin._cached_locals == current_locals
|
|
|
|
|
|
async def test_notify_locals_changed_end_to_end_no_changes(
|
|
coresys: CoreSys, mock_call_later
|
|
):
|
|
"""Test notify_locals_changed end-to-end: no local DNS changes detected."""
|
|
dns_plugin = coresys.plugins.dns
|
|
|
|
# Set cached locals to match current network state
|
|
current_locals = dns_plugin._compute_locals()
|
|
dns_plugin._cached_locals = current_locals
|
|
|
|
with (
|
|
patch.object(dns_plugin, "restart") as mock_restart,
|
|
patch.object(dns_plugin, "sys_call_later", new=mock_call_later),
|
|
):
|
|
# Call notify_locals_changed
|
|
dns_plugin.notify_locals_changed()
|
|
|
|
# Wait for the async task to complete
|
|
await asyncio.sleep(0.1)
|
|
|
|
# Verify restart was NOT called since no changes
|
|
mock_restart.assert_not_called()
|
|
assert dns_plugin._cached_locals == current_locals
|
|
|
|
|
|
async def test_notify_locals_changed_debouncing_cancels_previous_timer(
|
|
coresys: CoreSys,
|
|
):
|
|
"""Test notify_locals_changed debouncing cancels previous timer before creating new one."""
|
|
dns_plugin = coresys.plugins.dns
|
|
|
|
# Set cached locals to trigger change detection
|
|
current_locals = dns_plugin._compute_locals()
|
|
dns_plugin._cached_locals = (
|
|
["dns://192.168.1.1"]
|
|
if current_locals != ["dns://192.168.1.1"]
|
|
else ["dns://192.168.1.2"]
|
|
)
|
|
|
|
call_count = 0
|
|
handles = []
|
|
|
|
def mock_call_later_with_tracking(_delay, *args, **kwargs) -> asyncio.TimerHandle:
|
|
"""Mock to remove delay and track calls."""
|
|
nonlocal call_count
|
|
call_count += 1
|
|
handle = coresys.call_later(0, *args, **kwargs)
|
|
handles.append(handle)
|
|
return handle
|
|
|
|
with (
|
|
patch.object(dns_plugin, "restart") as mock_restart,
|
|
patch.object(dns_plugin.instance, "is_running", return_value=True),
|
|
patch.object(dns_plugin, "sys_call_later", new=mock_call_later_with_tracking),
|
|
):
|
|
# First call sets up timer
|
|
dns_plugin.notify_locals_changed()
|
|
assert call_count == 1
|
|
first_handle = dns_plugin._locals_changed_handle
|
|
assert first_handle is not None
|
|
|
|
# Second call should cancel first timer and create new one
|
|
dns_plugin.notify_locals_changed()
|
|
assert call_count == 2
|
|
second_handle = dns_plugin._locals_changed_handle
|
|
assert second_handle is not None
|
|
assert first_handle != second_handle
|
|
|
|
# Wait for the async task to complete
|
|
await asyncio.sleep(0.1)
|
|
|
|
# Verify restart was called once for the final timer
|
|
mock_restart.assert_called_once()
|
|
assert dns_plugin._cached_locals == current_locals
|
|
|
|
|
|
async def test_stop_cancels_pending_timers_and_tasks(coresys: CoreSys):
|
|
"""Test stop cancels pending locals change timers and restart tasks to prevent resource leaks."""
|
|
dns_plugin = coresys.plugins.dns
|
|
|
|
mock_timer_handle = Mock()
|
|
mock_task_handle = Mock()
|
|
dns_plugin._locals_changed_handle = mock_timer_handle
|
|
dns_plugin._restart_after_locals_change_handle = mock_task_handle
|
|
|
|
with patch.object(dns_plugin.instance, "stop"):
|
|
await dns_plugin.stop()
|
|
|
|
# Should cancel pending timer and task, then clean up
|
|
mock_timer_handle.cancel.assert_called_once()
|
|
mock_task_handle.cancel.assert_called_once()
|
|
assert dns_plugin._locals_changed_handle is None
|
|
assert dns_plugin._restart_after_locals_change_handle is None
|
|
|
|
|
|
async def test_dns_restart_triggers_connectivity_check(coresys: CoreSys):
|
|
"""Test end-to-end that DNS container restart triggers connectivity check."""
|
|
dns_plugin = coresys.plugins.dns
|
|
|
|
# Load the plugin to register the event listener
|
|
with (
|
|
patch.object(type(dns_plugin.instance), "attach"),
|
|
patch.object(type(dns_plugin.instance), "is_running", return_value=True),
|
|
):
|
|
await dns_plugin.load()
|
|
|
|
# Verify listener was registered (connectivity check listener should be stored)
|
|
assert dns_plugin._connectivity_check_listener is not None
|
|
|
|
# Create event to signal when connectivity check is requested
|
|
connectivity_check_event = asyncio.Event()
|
|
|
|
# Mock the fire-and-forget request to set the event when called
|
|
def mock_request_connectivity_check(*, force: bool = False):
|
|
connectivity_check_event.set()
|
|
|
|
with (
|
|
patch.object(
|
|
coresys.supervisor,
|
|
"request_connectivity_check",
|
|
side_effect=mock_request_connectivity_check,
|
|
),
|
|
patch("supervisor.plugins.dns.asyncio.sleep") as mock_sleep,
|
|
):
|
|
# Fire the DNS container state change event through bus system
|
|
coresys.bus.fire_event(
|
|
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
|
|
DockerContainerStateEvent(
|
|
name="hassio_dns",
|
|
state=ContainerState.RUNNING,
|
|
id="test_id",
|
|
time=1234567890,
|
|
),
|
|
)
|
|
|
|
# Wait for connectivity check to be called
|
|
await asyncio.wait_for(connectivity_check_event.wait(), timeout=1.0)
|
|
|
|
# Verify sleep was called with correct delay
|
|
mock_sleep.assert_called_once_with(5)
|
|
|
|
# Reset and test that other containers don't trigger check
|
|
connectivity_check_event.clear()
|
|
mock_sleep.reset_mock()
|
|
|
|
# Fire event for different container
|
|
coresys.bus.fire_event(
|
|
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
|
|
DockerContainerStateEvent(
|
|
name="hassio_homeassistant",
|
|
state=ContainerState.RUNNING,
|
|
id="test_id",
|
|
time=1234567890,
|
|
),
|
|
)
|
|
|
|
# Wait a bit and verify connectivity check was NOT triggered
|
|
with pytest.raises(TimeoutError):
|
|
await asyncio.wait_for(connectivity_check_event.wait(), timeout=0.1)
|
|
|
|
# Verify sleep was not called for other containers
|
|
mock_sleep.assert_not_called()
|