Fix progress when using containerd snapshotter (#6357)

* Fix progress when using containerd snapshotter * Add test for tiny image download under containerd-snapshotter * Fix API tests after progress allocation change * Fix test for auth changes * Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Stefan Agner <stefan@agner.ch> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-12-20 02:18:59 +00:00 · 2025-11-27 10:26:22 -05:00
parent f55fd891e9
commit 6302c7d394
7 changed files with 345 additions and 45 deletions
--- a/supervisor/docker/interface.py
+++ b/supervisor/docker/interface.py
@@ -226,28 +226,16 @@ class DockerInterface(JobGroup, ABC):
                job = j
                break

-        # This likely only occurs if the logs came in out of sync and we got progress before the Pulling FS Layer one
+        # There should no longer be any real risk of logs out of order anymore.
+        # However tests with very small images have shown that sometimes Docker
+        # skips stages in log. So keeping this one as a safety check on null job
        if not job:
            raise DockerLogOutOfOrder(
                f"Received pull image log with status {reference.status} for image id {reference.id} and parent job {install_job_id} but could not find a matching job, skipping",
                _LOGGER.debug,
            )

-        # Hopefully these come in order but if they sometimes get out of sync, avoid accidentally going backwards
-        # If it happens a lot though we may need to reconsider the value of this feature
-        if job.done:
-            raise DockerLogOutOfOrder(
-                f"Received pull image log with status {reference.status} for job {job.uuid} but job was done, skipping",
-                _LOGGER.debug,
-            )
-
-        if job.stage and stage < PullImageLayerStage.from_status(job.stage):
-            raise DockerLogOutOfOrder(
-                f"Received pull image log with status {reference.status} for job {job.uuid} but job was already on stage {job.stage}, skipping",
-                _LOGGER.debug,
-            )
-
-        # For progress calcuation we assume downloading and extracting are each 50% of the time and others stages negligible
+        # For progress calculation we assume downloading is 70% of time, extracting is 30% and others stages negligible
        progress = job.progress
        match stage:
            case PullImageLayerStage.DOWNLOADING | PullImageLayerStage.EXTRACTING:
@@ -256,22 +244,26 @@ class DockerInterface(JobGroup, ABC):
                    and reference.progress_detail.current
                    and reference.progress_detail.total
                ):
-                    progress = 50 * (
+                    progress = (
                        reference.progress_detail.current
                        / reference.progress_detail.total
                    )
-                    if stage == PullImageLayerStage.EXTRACTING:
-                        progress += 50
+                    if stage == PullImageLayerStage.DOWNLOADING:
+                        progress = 70 * progress
+                    else:
+                        progress = 70 + 30 * progress
            case (
                PullImageLayerStage.VERIFYING_CHECKSUM
                | PullImageLayerStage.DOWNLOAD_COMPLETE
            ):
-                progress = 50
+                progress = 70
            case PullImageLayerStage.PULL_COMPLETE:
                progress = 100
            case PullImageLayerStage.RETRYING_DOWNLOAD:
                progress = 0

+        # No real risk of getting things out of order in current implementation
+        # but keeping this one in case another change to these trips us up.
        if stage != PullImageLayerStage.RETRYING_DOWNLOAD and progress < job.progress:
            raise DockerLogOutOfOrder(
                f"Received pull image log with status {reference.status} for job {job.uuid} that implied progress was {progress} but current progress is {job.progress}, skipping",
@@ -335,7 +327,7 @@ class DockerInterface(JobGroup, ABC):
        progress = 0.0
        stage = PullImageLayerStage.PULL_COMPLETE
        for job in layer_jobs:
-            if not job.extra:
+            if not job.extra or not job.extra.get("total"):
                return
            progress += job.progress * (job.extra["total"] / total)
            job_stage = PullImageLayerStage.from_status(cast(str, job.stage))
--- a/supervisor/docker/manager.py
+++ b/supervisor/docker/manager.py
@@ -111,10 +111,15 @@ class PullProgressDetail:
    """Progress detail information for pull.

    Documentation lacking but both of these seem to be in bytes when populated.
+
+    Containerd-snapshot update - When leveraging this new feature, this information
+    becomes useless to us while extracting. It simply tells elapsed time using
+    current and units.
    """

    current: int | None = None
    total: int | None = None
+    units: str | None = None

    @classmethod
    def from_pull_log_dict(cls, value: dict[str, int]) -> PullProgressDetail:
--- a/tests/api/test_homeassistant.py
+++ b/tests/api/test_homeassistant.py
@@ -323,29 +323,29 @@ async def test_api_progress_updates_home_assistant_update(
        },
        {
            "stage": None,
-            "progress": 1.2,
+            "progress": 1.7,
            "done": False,
        },
        {
            "stage": None,
-            "progress": 2.8,
+            "progress": 4.0,
            "done": False,
        },
    ]
    assert events[-5:] == [
        {
            "stage": None,
-            "progress": 97.2,
+            "progress": 98.2,
            "done": False,
        },
        {
            "stage": None,
-            "progress": 98.4,
+            "progress": 98.3,
            "done": False,
        },
        {
            "stage": None,
-            "progress": 99.4,
+            "progress": 99.3,
            "done": False,
        },
        {
--- a/tests/api/test_store.py
+++ b/tests/api/test_store.py
@@ -773,29 +773,29 @@ async def test_api_progress_updates_addon_install_update(
        },
        {
            "stage": None,
-            "progress": 1.2,
+            "progress": 1.7,
            "done": False,
        },
        {
            "stage": None,
-            "progress": 2.8,
+            "progress": 4.0,
            "done": False,
        },
    ]
    assert events[-5:] == [
        {
            "stage": None,
-            "progress": 97.2,
+            "progress": 98.2,
            "done": False,
        },
        {
            "stage": None,
-            "progress": 98.4,
+            "progress": 98.3,
            "done": False,
        },
        {
            "stage": None,
-            "progress": 99.4,
+            "progress": 99.3,
            "done": False,
        },
        {
--- a/tests/api/test_supervisor.py
+++ b/tests/api/test_supervisor.py
@@ -371,29 +371,29 @@ async def test_api_progress_updates_supervisor_update(
        },
        {
            "stage": None,
-            "progress": 1.2,
+            "progress": 1.7,
            "done": False,
        },
        {
            "stage": None,
-            "progress": 2.8,
+            "progress": 4.0,
            "done": False,
        },
    ]
    assert events[-5:] == [
        {
            "stage": None,
-            "progress": 97.2,
+            "progress": 98.2,
            "done": False,
        },
        {
            "stage": None,
-            "progress": 98.4,
+            "progress": 98.3,
            "done": False,
        },
        {
            "stage": None,
-            "progress": 99.4,
+            "progress": 99.3,
            "done": False,
        },
        {
--- a/tests/docker/test_interface.py
+++ b/tests/docker/test_interface.py
@@ -26,7 +26,10 @@ from supervisor.exceptions import (
    DockerNotFound,
    DockerRequestError,
 )
-from supervisor.jobs import JobSchedulerOptions, SupervisorJob
+from supervisor.homeassistant.const import WSEvent, WSType
+from supervisor.jobs import ChildJobSyncFilter, JobSchedulerOptions, SupervisorJob
+from supervisor.jobs.decorator import Job
+from supervisor.supervisor import Supervisor

 from tests.common import AsyncIterator, load_json_fixture

@@ -314,7 +317,7 @@ async def test_install_fires_progress_events(
        },
        {"status": "Already exists", "progressDetail": {}, "id": "6e771e15690e"},
        {"status": "Pulling fs layer", "progressDetail": {}, "id": "1578b14a573c"},
-        {"status": "Waiting", "progressDetail": {}, "id": "2488d0e401e1"},
+        {"status": "Waiting", "progressDetail": {}, "id": "1578b14a573c"},
        {
            "status": "Downloading",
            "progressDetail": {"current": 1378, "total": 1486},
@@ -384,7 +387,7 @@ async def test_install_fires_progress_events(
            job_id=ANY,
            status="Waiting",
            progress_detail=PullProgressDetail(),
-            id="2488d0e401e1",
+            id="1578b14a573c",
        ),
        PullLogEntry(
            job_id=ANY,
@@ -538,6 +541,7 @@ async def test_install_raises_on_pull_error(
            "status": "Pulling from home-assistant/odroid-n2-homeassistant",
            "id": "2025.7.2",
        },
+        {"status": "Pulling fs layer", "progressDetail": {}, "id": "1578b14a573c"},
        {
            "status": "Downloading",
            "progressDetail": {"current": 1378, "total": 1486},
@@ -592,16 +596,39 @@ async def test_install_progress_handles_download_restart(
    capture_exception.assert_not_called()


+@pytest.mark.parametrize(
+    "extract_log",
+    [
+        {
+            "status": "Extracting",
+            "progressDetail": {"current": 96, "total": 96},
+            "progress": "[==================================================>]      96B/96B",
+            "id": "02a6e69d8d00",
+        },
+        {
+            "status": "Extracting",
+            "progressDetail": {"current": 1, "units": "s"},
+            "progress": "1 s",
+            "id": "02a6e69d8d00",
+        },
+    ],
+    ids=["normal_extract_log", "containerd_snapshot_extract_log"],
+)
 async def test_install_progress_handles_layers_skipping_download(
    coresys: CoreSys,
    test_docker_interface: DockerInterface,
    capture_exception: Mock,
+    extract_log: dict[str, Any],
 ):
    """Test install handles small layers that skip downloading phase and go directly to download complete.

    Reproduces the real-world scenario from Supervisor issue #6286:
    - Small layer (02a6e69d8d00) completes Download complete at 10:14:08 without ever Downloading
    - Normal layer (3f4a84073184) starts Downloading at 10:14:09 with progress updates
+
+    Under containerd snapshotter this presumably can still occur and Supervisor will have even less info
+    since extract logs don't have a total. Supervisor should generally just ignore these and set progress
+    from the larger images that take all the time.
    """
    coresys.core.set_state(CoreState.RUNNING)

@@ -645,12 +672,7 @@ async def test_install_progress_handles_layers_skipping_download(
        },
        {"status": "Pull complete", "progressDetail": {}, "id": "3f4a84073184"},
        # Small layer finally extracts (10:14:58 in logs)
-        {
-            "status": "Extracting",
-            "progressDetail": {"current": 96, "total": 96},
-            "progress": "[==================================================>]      96B/96B",
-            "id": "02a6e69d8d00",
-        },
+        extract_log,
        {"status": "Pull complete", "progressDetail": {}, "id": "02a6e69d8d00"},
        {"status": "Digest: sha256:test"},
        {"status": "Status: Downloaded newer image for test/image:latest"},
@@ -758,3 +780,88 @@ async def test_missing_total_handled_gracefully(
    await event.wait()

    capture_exception.assert_not_called()
+
+
+async def test_install_progress_containerd_snapshot(
+    coresys: CoreSys, ha_ws_client: AsyncMock
+):
+    """Test install handles docker progress events using containerd snapshotter."""
+    coresys.core.set_state(CoreState.RUNNING)
+
+    class TestDockerInterface(DockerInterface):
+        """Test interface for events."""
+
+        @property
+        def name(self) -> str:
+            """Name of test interface."""
+            return "test_interface"
+
+        @Job(
+            name="mock_docker_interface_install",
+            child_job_syncs=[
+                ChildJobSyncFilter("docker_interface_install", progress_allocation=1.0)
+            ],
+        )
+        async def mock_install(self) -> None:
+            """Mock install."""
+            await super().install(
+                AwesomeVersion("1.2.3"), image="test", arch=CpuArch.I386
+            )
+
+    # Fixture emulates log as received when using containerd snapshotter
+    # Should not error but progress gets choppier once extraction starts
+    logs = load_json_fixture("docker_pull_image_log_containerd_snapshot.json")
+    coresys.docker.images.pull.return_value = AsyncIterator(logs)
+    test_docker_interface = TestDockerInterface(coresys)
+
+    with patch.object(Supervisor, "arch", PropertyMock(return_value="i386")):
+        await test_docker_interface.mock_install()
+        coresys.docker.images.pull.assert_called_once_with(
+            "test", tag="1.2.3", platform="linux/386", auth=None, stream=True
+        )
+        coresys.docker.images.inspect.assert_called_once_with("test:1.2.3")
+
+    await asyncio.sleep(1)
+
+    def job_event(progress: float, done: bool = False):
+        return {
+            "type": WSType.SUPERVISOR_EVENT,
+            "data": {
+                "event": WSEvent.JOB,
+                "data": {
+                    "name": "mock_docker_interface_install",
+                    "reference": "test_interface",
+                    "uuid": ANY,
+                    "progress": progress,
+                    "stage": None,
+                    "done": done,
+                    "parent_id": None,
+                    "errors": [],
+                    "created": ANY,
+                    "extra": None,
+                },
+            },
+        }
+
+    assert [c.args[0] for c in ha_ws_client.async_send_command.call_args_list] == [
+        # During downloading we get continuous progress updates from download status
+        job_event(0),
+        job_event(3.4),
+        job_event(8.5),
+        job_event(10.2),
+        job_event(15.3),
+        job_event(18.8),
+        job_event(29.0),
+        job_event(35.8),
+        job_event(42.6),
+        job_event(49.5),
+        job_event(56.0),
+        job_event(62.8),
+        # Downloading phase is considered 70% of total. After we only get one update
+        # per image downloaded when extraction is finished. It uses the total size
+        # received during downloading to determine percent complete then.
+        job_event(70.0),
+        job_event(84.8),
+        job_event(100),
+        job_event(100, True),
+    ]
--- a/tests/fixtures/docker_pull_image_log_containerd_snapshot.json
+++ b/tests/fixtures/docker_pull_image_log_containerd_snapshot.json
@@ -0,0 +1,196 @@
+[
+  {
+    "status": "Pulling from home-assistant/home-assistant",
+    "id": "2025.12.0.dev202511080235"
+  },
+  { "status": "Pulling fs layer", "progressDetail": {}, "id": "eafecc6b43cc" },
+  { "status": "Pulling fs layer", "progressDetail": {}, "id": "333270549f95" },
+  {
+    "status": "Downloading",
+    "progressDetail": { "current": 1048576, "total": 21863319 },
+    "progress": "[==\u003e                                                ]  1.049MB/21.86MB",
+    "id": "eafecc6b43cc"
+  },
+  {
+    "status": "Downloading",
+    "progressDetail": { "current": 1048576, "total": 21179924 },
+    "progress": "[==\u003e                                                ]  1.049MB/21.18MB",
+    "id": "333270549f95"
+  },
+  {
+    "status": "Downloading",
+    "progressDetail": { "current": 4194304, "total": 21863319 },
+    "progress": "[=========\u003e                                         ]  4.194MB/21.86MB",
+    "id": "eafecc6b43cc"
+  },
+  {
+    "status": "Downloading",
+    "progressDetail": { "current": 2097152, "total": 21179924 },
+    "progress": "[====\u003e                                              ]  2.097MB/21.18MB",
+    "id": "333270549f95"
+  },
+  {
+    "status": "Downloading",
+    "progressDetail": { "current": 7340032, "total": 21863319 },
+    "progress": "[================\u003e                                  ]   7.34MB/21.86MB",
+    "id": "eafecc6b43cc"
+  },
+  {
+    "status": "Downloading",
+    "progressDetail": { "current": 4194304, "total": 21179924 },
+    "progress": "[=========\u003e                                         ]  4.194MB/21.18MB",
+    "id": "333270549f95"
+  },
+  {
+    "status": "Downloading",
+    "progressDetail": { "current": 13631488, "total": 21863319 },
+    "progress": "[===============================\u003e                   ]  13.63MB/21.86MB",
+    "id": "eafecc6b43cc"
+  },
+  {
+    "status": "Downloading",
+    "progressDetail": { "current": 8388608, "total": 21179924 },
+    "progress": "[===================\u003e                               ]  8.389MB/21.18MB",
+    "id": "333270549f95"
+  },
+  {
+    "status": "Downloading",
+    "progressDetail": { "current": 17825792, "total": 21863319 },
+    "progress": "[========================================\u003e          ]  17.83MB/21.86MB",
+    "id": "eafecc6b43cc"
+  },
+  {
+    "status": "Downloading",
+    "progressDetail": { "current": 12582912, "total": 21179924 },
+    "progress": "[=============================\u003e                     ]  12.58MB/21.18MB",
+    "id": "333270549f95"
+  },
+  {
+    "status": "Downloading",
+    "progressDetail": { "current": 21863319, "total": 21863319 },
+    "progress": "[==================================================\u003e]  21.86MB/21.86MB",
+    "id": "eafecc6b43cc"
+  },
+  {
+    "status": "Downloading",
+    "progressDetail": { "current": 16777216, "total": 21179924 },
+    "progress": "[=======================================\u003e           ]  16.78MB/21.18MB",
+    "id": "333270549f95"
+  },
+  {
+    "status": "Downloading",
+    "progressDetail": { "current": 21179924, "total": 21179924 },
+    "progress": "[==================================================\u003e]  21.18MB/21.18MB",
+    "id": "333270549f95"
+  },
+  {
+    "status": "Download complete",
+    "progressDetail": { "hidecounts": true },
+    "id": "eafecc6b43cc"
+  },
+  {
+    "status": "Download complete",
+    "progressDetail": { "hidecounts": true },
+    "id": "333270549f95"
+  },
+  {
+    "status": "Extracting",
+    "progressDetail": { "current": 1, "units": "s" },
+    "progress": "1 s",
+    "id": "333270549f95"
+  },
+  {
+    "status": "Extracting",
+    "progressDetail": { "current": 1, "units": "s" },
+    "progress": "1 s",
+    "id": "333270549f95"
+  },
+  {
+    "status": "Pull complete",
+    "progressDetail": { "hidecounts": true },
+    "id": "333270549f95"
+  },
+  {
+    "status": "Extracting",
+    "progressDetail": { "current": 1, "units": "s" },
+    "progress": "1 s",
+    "id": "eafecc6b43cc"
+  },
+  {
+    "status": "Extracting",
+    "progressDetail": { "current": 1, "units": "s" },
+    "progress": "1 s",
+    "id": "eafecc6b43cc"
+  },
+  {
+    "status": "Extracting",
+    "progressDetail": { "current": 2, "units": "s" },
+    "progress": "2 s",
+    "id": "eafecc6b43cc"
+  },
+  {
+    "status": "Extracting",
+    "progressDetail": { "current": 2, "units": "s" },
+    "progress": "2 s",
+    "id": "eafecc6b43cc"
+  },
+  {
+    "status": "Extracting",
+    "progressDetail": { "current": 3, "units": "s" },
+    "progress": "3 s",
+    "id": "eafecc6b43cc"
+  },
+  {
+    "status": "Extracting",
+    "progressDetail": { "current": 3, "units": "s" },
+    "progress": "3 s",
+    "id": "eafecc6b43cc"
+  },
+  {
+    "status": "Extracting",
+    "progressDetail": { "current": 4, "units": "s" },
+    "progress": "4 s",
+    "id": "eafecc6b43cc"
+  },
+  {
+    "status": "Extracting",
+    "progressDetail": { "current": 4, "units": "s" },
+    "progress": "4 s",
+    "id": "eafecc6b43cc"
+  },
+  {
+    "status": "Extracting",
+    "progressDetail": { "current": 5, "units": "s" },
+    "progress": "5 s",
+    "id": "eafecc6b43cc"
+  },
+  {
+    "status": "Extracting",
+    "progressDetail": { "current": 5, "units": "s" },
+    "progress": "5 s",
+    "id": "eafecc6b43cc"
+  },
+  {
+    "status": "Extracting",
+    "progressDetail": { "current": 6, "units": "s" },
+    "progress": "6 s",
+    "id": "eafecc6b43cc"
+  },
+  {
+    "status": "Extracting",
+    "progressDetail": { "current": 6, "units": "s" },
+    "progress": "6 s",
+    "id": "eafecc6b43cc"
+  },
+  {
+    "status": "Pull complete",
+    "progressDetail": { "hidecounts": true },
+    "id": "eafecc6b43cc"
+  },
+  {
+    "status": "Digest: sha256:bfc9efc13552c0c228f3d9d35987331cce68b43c9bc79c80a57eeadadd44cccf"
+  },
+  {
+    "status": "Status: Downloaded newer image for ghcr.io/home-assistant/home-assistant:2025.12.0.dev202511080235"
+  }
+]