mirror of
https://github.com/home-assistant/supervisor.git
synced 2025-12-24 20:35:55 +00:00
Improve progress reporting when layers are locally present
With containerd snapshotter, some layers skip the "Downloading" phase
entirely and go directly from "Pulling fs layer" to "Download complete".
These layers only receive a placeholder extra={current: 1, total: 1}
instead of actual size information.
The previous fix in #6357 still required ALL layers to have extra before
calculating progress, which caused the UI to jump from 0% to 70%+ when
the first real layer reported after cached layers completed.
This change improves progress calculation by:
- Separating "real" layers (with actual size > 1 byte) from "placeholder"
layers (cached/small layers with size = 1)
- Calculating weighted progress only from real layers
- Not reporting progress until at least one real layer has size info
- Correctly tracking stage when placeholder layers are still extracting
Tested with real-world pull events from a Home Assistant Core update
that had 12 layers where 5 skipped downloading entirely.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -312,24 +312,44 @@ class DockerInterface(JobGroup, ABC):
|
||||
and job.name == "Pulling container image layer"
|
||||
]
|
||||
|
||||
# First set the total bytes to be downloaded/extracted on the main job
|
||||
if not install_job.extra:
|
||||
total = 0
|
||||
for job in layer_jobs:
|
||||
if not job.extra:
|
||||
return
|
||||
total += job.extra["total"]
|
||||
install_job.extra = {"total": total}
|
||||
else:
|
||||
total = install_job.extra["total"]
|
||||
# Calculate total from layers that have reported size info
|
||||
# With containerd snapshotter, some layers skip "Downloading" and go directly to
|
||||
# "Download complete", so we can't wait for all layers to have extra before reporting progress
|
||||
layers_with_extra = [
|
||||
job for job in layer_jobs if job.extra and job.extra.get("total")
|
||||
]
|
||||
if not layers_with_extra:
|
||||
return
|
||||
|
||||
# Then determine total progress based on progress of each sub-job, factoring in size of each compared to total
|
||||
# Sum up total bytes. Layers that skip downloading get placeholder extra={1,1}
|
||||
# which doesn't represent actual size. Separate "real" layers from placeholders.
|
||||
# Filter guarantees job.extra is not None and has "total" key
|
||||
real_layers = [
|
||||
job for job in layers_with_extra if cast(dict, job.extra)["total"] > 1
|
||||
]
|
||||
placeholder_layers = [
|
||||
job for job in layers_with_extra if cast(dict, job.extra)["total"] == 1
|
||||
]
|
||||
|
||||
# If we only have placeholder layers (no real size info yet), don't report progress
|
||||
# This prevents tiny cached layers from showing inflated progress before
|
||||
# the actual download sizes are known
|
||||
if not real_layers:
|
||||
return
|
||||
|
||||
total = sum(cast(dict, job.extra)["total"] for job in real_layers)
|
||||
if total == 0:
|
||||
return
|
||||
|
||||
# Update install_job.extra with current total (may increase as more layers report)
|
||||
install_job.extra = {"total": total}
|
||||
|
||||
# Calculate progress based on layers that have real size info
|
||||
# Placeholder layers (skipped downloads) count as complete but don't affect weighted progress
|
||||
progress = 0.0
|
||||
stage = PullImageLayerStage.PULL_COMPLETE
|
||||
for job in layer_jobs:
|
||||
if not job.extra or not job.extra.get("total"):
|
||||
return
|
||||
progress += job.progress * (job.extra["total"] / total)
|
||||
for job in real_layers:
|
||||
progress += job.progress * (cast(dict, job.extra)["total"] / total)
|
||||
job_stage = PullImageLayerStage.from_status(cast(str, job.stage))
|
||||
|
||||
if job_stage < PullImageLayerStage.EXTRACTING:
|
||||
@@ -340,6 +360,21 @@ class DockerInterface(JobGroup, ABC):
|
||||
):
|
||||
stage = PullImageLayerStage.EXTRACTING
|
||||
|
||||
# Check if any layers are still pending (no extra yet)
|
||||
# If so, we're still in downloading phase even if all layers_with_extra are done
|
||||
layers_pending = len(layer_jobs) - len(layers_with_extra)
|
||||
if layers_pending > 0 and stage == PullImageLayerStage.PULL_COMPLETE:
|
||||
stage = PullImageLayerStage.DOWNLOADING
|
||||
|
||||
# Also check if all placeholders are done but we're waiting for real layers
|
||||
if placeholder_layers and stage == PullImageLayerStage.PULL_COMPLETE:
|
||||
# All real layers are done, but check if placeholders are still extracting
|
||||
for job in placeholder_layers:
|
||||
job_stage = PullImageLayerStage.from_status(cast(str, job.stage))
|
||||
if job_stage < PullImageLayerStage.PULL_COMPLETE:
|
||||
stage = PullImageLayerStage.EXTRACTING
|
||||
break
|
||||
|
||||
# Ensure progress is 100 at this point to prevent float drift
|
||||
if stage == PullImageLayerStage.PULL_COMPLETE:
|
||||
progress = 100
|
||||
|
||||
@@ -709,11 +709,13 @@ async def test_install_progress_handles_layers_skipping_download(
|
||||
await install_task
|
||||
await event.wait()
|
||||
|
||||
# First update from layer download should have rather low progress ((260937/25445459) / 2 ~ 0.5%)
|
||||
assert install_job_snapshots[0]["progress"] < 1
|
||||
# First update from layer download should have rather low progress ((260937/25371463) ~= 1%)
|
||||
assert install_job_snapshots[0]["progress"] < 2
|
||||
|
||||
# Total 8 events should lead to a progress update on the install job
|
||||
assert len(install_job_snapshots) == 8
|
||||
# Total 7 events should lead to a progress update on the install job:
|
||||
# 3 Downloading events + Download complete (70%) + Extracting + Pull complete (100%) + stage change
|
||||
# Note: The small placeholder layer ({1,1}) is excluded from progress calculation
|
||||
assert len(install_job_snapshots) == 7
|
||||
|
||||
# Job should complete successfully
|
||||
assert job.done is True
|
||||
@@ -865,3 +867,76 @@ async def test_install_progress_containerd_snapshot(
|
||||
job_event(100),
|
||||
job_event(100, True),
|
||||
]
|
||||
|
||||
|
||||
async def test_install_progress_containerd_snapshotter_real_world(
|
||||
coresys: CoreSys, ha_ws_client: AsyncMock
|
||||
):
|
||||
"""Test install handles real-world containerd snapshotter events.
|
||||
|
||||
This test uses real pull events captured from a Home Assistant Core update
|
||||
where some layers skip the Downloading phase entirely (going directly from
|
||||
"Pulling fs layer" to "Download complete"). This causes the bug where progress
|
||||
jumps from 0 to 100 without intermediate updates.
|
||||
|
||||
Root cause: _update_install_job_status() returns early if ANY layer has
|
||||
extra=None. Layers that skip Downloading don't get extra until Download complete,
|
||||
so progress cannot be calculated until ALL layers reach Download complete.
|
||||
"""
|
||||
coresys.core.set_state(CoreState.RUNNING)
|
||||
|
||||
class TestDockerInterface(DockerInterface):
|
||||
"""Test interface for events."""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
"""Name of test interface."""
|
||||
return "test_interface"
|
||||
|
||||
@Job(
|
||||
name="mock_docker_interface_install_realworld",
|
||||
child_job_syncs=[
|
||||
ChildJobSyncFilter("docker_interface_install", progress_allocation=1.0)
|
||||
],
|
||||
)
|
||||
async def mock_install(self) -> None:
|
||||
"""Mock install."""
|
||||
await super().install(
|
||||
AwesomeVersion("1.2.3"), image="test", arch=CpuArch.I386
|
||||
)
|
||||
|
||||
# Real-world fixture: 12 layers, 262 Downloading events
|
||||
# Some layers skip Downloading entirely (small layers with containerd snapshotter)
|
||||
logs = load_json_fixture("docker_pull_image_log_containerd_snapshotter_real.json")
|
||||
coresys.docker.images.pull.return_value = AsyncIterator(logs)
|
||||
test_docker_interface = TestDockerInterface(coresys)
|
||||
|
||||
with patch.object(Supervisor, "arch", PropertyMock(return_value="i386")):
|
||||
await test_docker_interface.mock_install()
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Get progress events for the parent job (what UI sees)
|
||||
job_events = [
|
||||
c.args[0]
|
||||
for c in ha_ws_client.async_send_command.call_args_list
|
||||
if c.args[0].get("data", {}).get("event") == WSEvent.JOB
|
||||
and c.args[0].get("data", {}).get("data", {}).get("name")
|
||||
== "mock_docker_interface_install_realworld"
|
||||
]
|
||||
progress_values = [e["data"]["data"]["progress"] for e in job_events]
|
||||
|
||||
# We should have intermediate progress updates, not just 0 and 100
|
||||
assert len(progress_values) > 3, (
|
||||
f"BUG: Progress jumped 0->100 without intermediate updates. "
|
||||
f"Got {len(progress_values)} updates: {progress_values}. "
|
||||
f"Expected intermediate progress during the 262 Downloading events."
|
||||
)
|
||||
|
||||
# Progress should be monotonically increasing
|
||||
for i in range(1, len(progress_values)):
|
||||
assert progress_values[i] >= progress_values[i - 1]
|
||||
|
||||
# Should see progress in downloading phase (0-70%)
|
||||
downloading_progress = [p for p in progress_values if 0 < p < 70]
|
||||
assert len(downloading_progress) > 0
|
||||
|
||||
Reference in New Issue
Block a user