Use OpenAI schema dataclasses for cloud stream responses (#161663)

2026-02-15 07:36:16 +00:00 · 2026-01-28 20:59:03 +01:00
parent 8536472fe9
commit bdbce57217
7 changed files with 75 additions and 73 deletions
--- a/homeassistant/components/cloud/entity.py
+++ b/homeassistant/components/cloud/entity.py
@@ -12,14 +12,25 @@ from hass_nabucasa import Cloud, NabuCasaBaseError
 from hass_nabucasa.llm import (
    LLMAuthenticationError,
    LLMRateLimitError,
+    LLMResponseCompletedEvent,
    LLMResponseError,
+    LLMResponseErrorEvent,
+    LLMResponseFailedEvent,
+    LLMResponseFunctionCallArgumentsDeltaEvent,
+    LLMResponseFunctionCallArgumentsDoneEvent,
+    LLMResponseFunctionCallOutputItem,
+    LLMResponseImageOutputItem,
+    LLMResponseIncompleteEvent,
+    LLMResponseMessageOutputItem,
+    LLMResponseOutputItemAddedEvent,
+    LLMResponseOutputItemDoneEvent,
+    LLMResponseOutputTextDeltaEvent,
+    LLMResponseReasoningOutputItem,
+    LLMResponseReasoningSummaryTextDeltaEvent,
+    LLMResponseWebSearchCallOutputItem,
+    LLMResponseWebSearchCallSearchingEvent,
    LLMServiceError,
 )
-from litellm import (
-    ResponseFunctionToolCall,
-    ResponseInputParam,
-    ResponsesAPIStreamEvents,
-)
 from openai.types.responses import (
    FunctionToolParam,
    ResponseInputItemParam,
@@ -60,9 +71,9 @@ class ResponseItemType(str, Enum):

 def _convert_content_to_param(
    chat_content: Iterable[conversation.Content],
-) -> ResponseInputParam:
+) -> list[ResponseInputItemParam]:
    """Convert any native chat message for this agent to the native format."""
-    messages: ResponseInputParam = []
+    messages: list[ResponseInputItemParam] = []
    reasoning_summary: list[str] = []
    web_search_calls: dict[str, dict[str, Any]] = {}

@@ -238,7 +249,7 @@ async def _transform_stream(  # noqa: C901 - This is complex, but better to have
    """Transform stream result into HA format."""
    last_summary_index = None
    last_role: Literal["assistant", "tool_result"] | None = None
-    current_tool_call: ResponseFunctionToolCall | None = None
+    current_tool_call: LLMResponseFunctionCallOutputItem | None = None

    # Non-reasoning models don't follow our request to remove citations, so we remove
    # them manually here. They always follow the same pattern: the citation is always
@@ -248,19 +259,10 @@ async def _transform_stream(  # noqa: C901 - This is complex, but better to have
    citation_regexp = re.compile(r"\(\[([^\]]+)\]\((https?:\/\/[^\)]+)\)")

    async for event in stream:
-        event_type = getattr(event, "type", None)
-        event_item = getattr(event, "item", None)
-        event_item_type = getattr(event_item, "type", None) if event_item else None
+        _LOGGER.debug("Event[%s]", getattr(event, "type", None))

-        _LOGGER.debug(
-            "Event[%s] | item: %s",
-            event_type,
-            event_item_type,
-        )
-
-        if event_type == ResponsesAPIStreamEvents.OUTPUT_ITEM_ADDED:
-            # Detect function_call even when it's a BaseLiteLLMOpenAIResponseObject
-            if event_item_type == ResponseItemType.FUNCTION_CALL:
+        if isinstance(event, LLMResponseOutputItemAddedEvent):
+            if isinstance(event.item, LLMResponseFunctionCallOutputItem):
                # OpenAI has tool calls as individual events
                # while HA puts tool calls inside the assistant message.
                # We turn them into individual assistant content for HA
@@ -268,11 +270,11 @@ async def _transform_stream(  # noqa: C901 - This is complex, but better to have
                yield {"role": "assistant"}
                last_role = "assistant"
                last_summary_index = None
-                current_tool_call = cast(ResponseFunctionToolCall, event.item)
+                current_tool_call = event.item
            elif (
-                event_item_type == ResponseItemType.MESSAGE
+                isinstance(event.item, LLMResponseMessageOutputItem)
                or (
-                    event_item_type == ResponseItemType.REASONING
+                    isinstance(event.item, LLMResponseReasoningOutputItem)
                    and last_summary_index is not None
                )  # Subsequent ResponseReasoningItem
                or last_role != "assistant"
@@ -281,14 +283,14 @@ async def _transform_stream(  # noqa: C901 - This is complex, but better to have
                last_role = "assistant"
                last_summary_index = None

-        elif event_type == ResponsesAPIStreamEvents.OUTPUT_ITEM_DONE:
-            if event_item_type == ResponseItemType.REASONING:
-                encrypted_content = getattr(event.item, "encrypted_content", None)
-                summary = getattr(event.item, "summary", []) or []
+        elif isinstance(event, LLMResponseOutputItemDoneEvent):
+            if isinstance(event.item, LLMResponseReasoningOutputItem):
+                encrypted_content = event.item.encrypted_content
+                summary = event.item.summary

                yield {
-                    "native": ResponseReasoningItem(
-                        type="reasoning",
+                    "native": LLMResponseReasoningOutputItem(
+                        type=event.item.type,
                        id=event.item.id,
                        summary=[],
                        encrypted_content=encrypted_content,
@@ -296,14 +298,8 @@ async def _transform_stream(  # noqa: C901 - This is complex, but better to have
                }

                last_summary_index = len(summary) - 1 if summary else None
-            elif event_item_type == ResponseItemType.WEB_SEARCH_CALL:
-                action = getattr(event.item, "action", None)
-                if isinstance(action, dict):
-                    action_dict = action
-                elif action is not None:
-                    action_dict = action.to_dict()
-                else:
-                    action_dict = {}
+            elif isinstance(event.item, LLMResponseWebSearchCallOutputItem):
+                action_dict = event.item.action
                yield {
                    "tool_calls": [
                        llm.ToolInput(
@@ -321,11 +317,11 @@ async def _transform_stream(  # noqa: C901 - This is complex, but better to have
                    "tool_result": {"status": event.item.status},
                }
                last_role = "tool_result"
-            elif event_item_type == ResponseItemType.IMAGE:
-                yield {"native": event.item}
+            elif isinstance(event.item, LLMResponseImageOutputItem):
+                yield {"native": event.item.raw}
                last_summary_index = -1  # Trigger new assistant message on next turn

-        elif event_type == ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA:
+        elif isinstance(event, LLMResponseOutputTextDeltaEvent):
            data = event.delta
            if remove_parentheses:
                data = data.removeprefix(")")
@@ -344,7 +340,7 @@ async def _transform_stream(  # noqa: C901 - This is complex, but better to have
            if data:
                yield {"content": data}

-        elif event_type == ResponsesAPIStreamEvents.REASONING_SUMMARY_TEXT_DELTA:
+        elif isinstance(event, LLMResponseReasoningSummaryTextDeltaEvent):
            # OpenAI can output several reasoning summaries
            # in a single ResponseReasoningItem. We split them as separate
            # AssistantContent messages. Only last of them will have
@@ -358,14 +354,14 @@ async def _transform_stream(  # noqa: C901 - This is complex, but better to have
            last_summary_index = event.summary_index
            yield {"thinking_content": event.delta}

-        elif event_type == ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA:
+        elif isinstance(event, LLMResponseFunctionCallArgumentsDeltaEvent):
            if current_tool_call is not None:
                current_tool_call.arguments += event.delta

-        elif event_type == ResponsesAPIStreamEvents.WEB_SEARCH_CALL_SEARCHING:
+        elif isinstance(event, LLMResponseWebSearchCallSearchingEvent):
            yield {"role": "assistant"}

-        elif event_type == ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DONE:
+        elif isinstance(event, LLMResponseFunctionCallArgumentsDoneEvent):
            if current_tool_call is not None:
                current_tool_call.status = "completed"

@@ -385,35 +381,36 @@ async def _transform_stream(  # noqa: C901 - This is complex, but better to have
                    ]
                }

-        elif event_type == ResponsesAPIStreamEvents.RESPONSE_COMPLETED:
-            if event.response.usage is not None:
+        elif isinstance(event, LLMResponseCompletedEvent):
+            response = event.response
+            if response and "usage" in response:
+                usage = response["usage"]
                chat_log.async_trace(
                    {
                        "stats": {
-                            "input_tokens": event.response.usage.input_tokens,
-                            "output_tokens": event.response.usage.output_tokens,
+                            "input_tokens": usage.get("input_tokens"),
+                            "output_tokens": usage.get("output_tokens"),
                        }
                    }
                )

-        elif event_type == ResponsesAPIStreamEvents.RESPONSE_INCOMPLETE:
-            if event.response.usage is not None:
+        elif isinstance(event, LLMResponseIncompleteEvent):
+            response = event.response
+            if response and "usage" in response:
+                usage = response["usage"]
                chat_log.async_trace(
                    {
                        "stats": {
-                            "input_tokens": event.response.usage.input_tokens,
-                            "output_tokens": event.response.usage.output_tokens,
+                            "input_tokens": usage.get("input_tokens"),
+                            "output_tokens": usage.get("output_tokens"),
                        }
                    }
                )

-            if (
-                event.response.incomplete_details
-                and event.response.incomplete_details.reason
-            ):
-                reason: str = event.response.incomplete_details.reason
-            else:
+            incomplete_details = response.get("incomplete_details")
            reason = "unknown reason"
+            if incomplete_details is not None and incomplete_details.get("reason"):
+                reason = incomplete_details["reason"]

            if reason == "max_output_tokens":
                reason = "max output tokens reached"
@@ -422,22 +419,24 @@ async def _transform_stream(  # noqa: C901 - This is complex, but better to have

            raise HomeAssistantError(f"OpenAI response incomplete: {reason}")

-        elif event_type == ResponsesAPIStreamEvents.RESPONSE_FAILED:
-            if event.response.usage is not None:
+        elif isinstance(event, LLMResponseFailedEvent):
+            response = event.response
+            if response and "usage" in response:
+                usage = response["usage"]
                chat_log.async_trace(
                    {
                        "stats": {
-                            "input_tokens": event.response.usage.input_tokens,
-                            "output_tokens": event.response.usage.output_tokens,
+                            "input_tokens": usage.get("input_tokens"),
+                            "output_tokens": usage.get("output_tokens"),
                        }
                    }
                )
            reason = "unknown reason"
-            if event.response.error is not None:
-                reason = event.response.error.message
+            if isinstance(error := response.get("error"), dict):
+                reason = error.get("message") or reason
            raise HomeAssistantError(f"OpenAI response failed: {reason}")

-        elif event_type == ResponsesAPIStreamEvents.ERROR:
+        elif isinstance(event, LLMResponseErrorEvent):
            raise HomeAssistantError(f"OpenAI response error: {event.message}")


@@ -452,7 +451,7 @@ class BaseCloudLLMEntity(Entity):
    async def _prepare_chat_for_generation(
        self,
        chat_log: conversation.ChatLog,
-        messages: ResponseInputParam,
+        messages: list[ResponseInputItemParam],
        response_format: dict[str, Any] | None = None,
    ) -> dict[str, Any]:
        """Prepare kwargs for Cloud LLM from the chat log."""
--- a/homeassistant/components/cloud/manifest.json
+++ b/homeassistant/components/cloud/manifest.json
@@ -13,6 +13,6 @@
  "integration_type": "system",
  "iot_class": "cloud_push",
  "loggers": ["acme", "hass_nabucasa", "snitun"],
-  "requirements": ["hass-nabucasa==1.11.0"],
+  "requirements": ["hass-nabucasa==1.12.0", "openai==2.15.0"],
  "single_config_entry": true
 }
--- a/homeassistant/package_constraints.txt
+++ b/homeassistant/package_constraints.txt
@@ -36,7 +36,7 @@ fnv-hash-fast==1.6.0
 go2rtc-client==0.4.0
 ha-ffmpeg==3.2.2
 habluetooth==5.8.0
-hass-nabucasa==1.11.0
+hass-nabucasa==1.12.0
 hassil==3.5.0
 home-assistant-bluetooth==1.13.1
 home-assistant-frontend==20260128.1
@@ -46,6 +46,7 @@ ifaddr==0.2.0
 Jinja2==3.1.6
 lru-dict==1.3.0
 mutagen==1.47.0
+openai==2.15.0
 orjson==3.11.5
 packaging>=23.1
 paho-mqtt==2.1.0
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,7 +48,7 @@ dependencies = [
  "fnv-hash-fast==1.6.0",
  # hass-nabucasa is imported by helpers which don't depend on the cloud
  # integration
-  "hass-nabucasa==1.11.0",
+  "hass-nabucasa==1.12.0",
  # When bumping httpx, please check the version pins of
  # httpcore, anyio, and h11 in gen_requirements_all
  "httpx==0.28.1",
--- a/requirements.txt
+++ b/requirements.txt
@@ -24,7 +24,7 @@ cronsim==2.7
 cryptography==46.0.2
 fnv-hash-fast==1.6.0
 ha-ffmpeg==3.2.2
-hass-nabucasa==1.11.0
+hass-nabucasa==1.12.0
 hassil==3.5.0
 home-assistant-bluetooth==1.13.1
 home-assistant-intents==2026.1.6
--- a/requirements_all.txt
+++ b/requirements_all.txt
@@ -1175,7 +1175,7 @@ habluetooth==5.8.0
 hanna-cloud==0.0.7

 # homeassistant.components.cloud
-hass-nabucasa==1.11.0
+hass-nabucasa==1.12.0

 # homeassistant.components.splunk
 hass-splunk==0.1.1
@@ -1664,6 +1664,7 @@ open-garage==0.2.0
 # homeassistant.components.open_meteo
 open-meteo==0.3.2

+# homeassistant.components.cloud
 # homeassistant.components.open_router
 # homeassistant.components.openai_conversation
 openai==2.15.0
--- a/requirements_test_all.txt
+++ b/requirements_test_all.txt
@@ -1045,7 +1045,7 @@ habluetooth==5.8.0
 hanna-cloud==0.0.7

 # homeassistant.components.cloud
-hass-nabucasa==1.11.0
+hass-nabucasa==1.12.0

 # homeassistant.components.assist_satellite
 # homeassistant.components.conversation
@@ -1447,6 +1447,7 @@ open-garage==0.2.0
 # homeassistant.components.open_meteo
 open-meteo==0.3.2

+# homeassistant.components.cloud
 # homeassistant.components.open_router
 # homeassistant.components.openai_conversation
 openai==2.15.0