From f41a8e9d082fe6a5782612a57fbb5c9f78795e7b Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 16 Mar 2026 12:46:29 +0100 Subject: [PATCH] Wait for addon startup task before unload to prevent data access race (#6630) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Wait for addon startup task before unload to prevent data access race Replace the cancel-based approach in unload() with an await of the outer _wait_for_startup_task. The container removal and state change resolve the startup event naturally, so we just need to ensure the task completes before addon data is removed. This prevents a KeyError on self.name access when _wait_for_startup times out after data has been removed. Also simplify _wait_for_startup by removing the unnecessary inner task wrapper — asyncio.wait_for can await the event directly. Co-Authored-By: Claude Opus 4.6 * Drop asyncio.sleep() in test_manager.py * Only clear startup task reference if still the current task Prevent a race where an older _wait_for_startup task's finally block could wipe the reference to a newer task, causing unload() to skip the await. Co-Authored-By: Claude Opus 4.6 * Reuse existing pending startup wait task when addon is already running If start() is called while the addon is already running and a startup wait task is still pending, return the existing task instead of creating a new one. Co-Authored-By: Claude Opus 4.6 --------- Co-authored-by: Claude Opus 4.6 --- supervisor/addons/addon.py | 27 ++++++++++++++++----------- tests/addons/test_manager.py | 9 +++------ 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/supervisor/addons/addon.py b/supervisor/addons/addon.py index 1a56911fe..5b2557969 100644 --- a/supervisor/addons/addon.py +++ b/supervisor/addons/addon.py @@ -151,7 +151,7 @@ class Addon(AddonModel): self._manual_stop: bool = False self._listeners: list[EventListener] = [] self._startup_event = asyncio.Event() - self._startup_task: asyncio.Task | None = None + self._wait_for_startup_task: asyncio.Task | None = None self._boot_failed_issue = Issue( IssueType.BOOT_FAIL, ContextType.ADDON, reference=self.slug ) @@ -760,11 +760,11 @@ class Addon(AddonModel): ) async def unload(self) -> None: """Unload add-on and remove data.""" - if self._startup_task: - # If we were waiting on startup, cancel that and let the task finish before proceeding - self._startup_task.cancel(f"Removing add-on {self.name} from system") - with suppress(asyncio.CancelledError): - await self._startup_task + # Wait for startup wait task to complete before removing data. + # The container remove/state change resolves _startup_event; this + # ensures _wait_for_startup finishes before we touch addon data. + if self._wait_for_startup_task: + await self._wait_for_startup_task for listener in self._listeners: self.sys_bus.remove_listener(listener) @@ -1109,8 +1109,7 @@ class Addon(AddonModel): async def _wait_for_startup(self) -> None: """Wait for startup event to be set with timeout.""" try: - self._startup_task = self.sys_create_task(self._startup_event.wait()) - await asyncio.wait_for(self._startup_task, STARTUP_TIMEOUT) + await asyncio.wait_for(self._startup_event.wait(), STARTUP_TIMEOUT) except TimeoutError: _LOGGER.warning( "Timeout while waiting for addon %s to start, took more than %s seconds", @@ -1120,7 +1119,8 @@ class Addon(AddonModel): except asyncio.CancelledError as err: _LOGGER.info("Wait for addon startup task cancelled due to: %s", err) finally: - self._startup_task = None + if self._wait_for_startup_task is asyncio.current_task(): + self._wait_for_startup_task = None @Job( name="addon_start", @@ -1136,7 +1136,11 @@ class Addon(AddonModel): """ if await self.instance.is_running(): _LOGGER.warning("%s is already running!", self.slug) - return self.sys_create_task(self._wait_for_startup()) + if not self._wait_for_startup_task or self._wait_for_startup_task.done(): + self._wait_for_startup_task = self.sys_create_task( + self._wait_for_startup() + ) + return self._wait_for_startup_task # Access Token self.persist[ATTR_ACCESS_TOKEN] = secrets.token_hex(56) @@ -1176,7 +1180,8 @@ class Addon(AddonModel): self.state = AddonState.ERROR raise AddonUnknownError(addon=self.slug) from err - return self.sys_create_task(self._wait_for_startup()) + self._wait_for_startup_task = self.sys_create_task(self._wait_for_startup()) + return self._wait_for_startup_task @Job( name="addon_stop", diff --git a/tests/addons/test_manager.py b/tests/addons/test_manager.py index 00bc38b99..8c1c832c9 100644 --- a/tests/addons/test_manager.py +++ b/tests/addons/test_manager.py @@ -333,13 +333,12 @@ async def test_rebuild( @pytest.mark.usefixtures("tmp_supervisor_data", "path_extern") -async def test_start_wait_cancel_on_uninstall( +async def test_start_wait_resolved_on_uninstall_in_startup( coresys: CoreSys, install_addon_ssh: Addon, container: DockerContainer, - caplog: pytest.LogCaptureFixture, ) -> None: - """Test the addon wait task is cancelled when addon is uninstalled.""" + """Test uninstall resolves the startup wait task when addon is in STARTUP state.""" install_addon_ssh.path_data.mkdir() container.show.return_value["Config"] = {"Healthcheck": "exists"} await install_addon_ssh.load() @@ -363,11 +362,9 @@ async def test_start_wait_cancel_on_uninstall( assert not start_task.done() assert install_addon_ssh.state == AddonState.STARTUP - caplog.clear() await coresys.addons.uninstall(TEST_ADDON_SLUG) - await asyncio.sleep(0.01) assert start_task.done() - assert "Wait for addon startup task cancelled" in caplog.text + assert start_task.exception() is None async def test_repository_file_missing(