diff --git a/homeassistant/components/cloud/entity.py b/homeassistant/components/cloud/entity.py index c4ab7ba358d..3f651340e93 100644 --- a/homeassistant/components/cloud/entity.py +++ b/homeassistant/components/cloud/entity.py @@ -12,14 +12,25 @@ from hass_nabucasa import Cloud, NabuCasaBaseError from hass_nabucasa.llm import ( LLMAuthenticationError, LLMRateLimitError, + LLMResponseCompletedEvent, LLMResponseError, + LLMResponseErrorEvent, + LLMResponseFailedEvent, + LLMResponseFunctionCallArgumentsDeltaEvent, + LLMResponseFunctionCallArgumentsDoneEvent, + LLMResponseFunctionCallOutputItem, + LLMResponseImageOutputItem, + LLMResponseIncompleteEvent, + LLMResponseMessageOutputItem, + LLMResponseOutputItemAddedEvent, + LLMResponseOutputItemDoneEvent, + LLMResponseOutputTextDeltaEvent, + LLMResponseReasoningOutputItem, + LLMResponseReasoningSummaryTextDeltaEvent, + LLMResponseWebSearchCallOutputItem, + LLMResponseWebSearchCallSearchingEvent, LLMServiceError, ) -from litellm import ( - ResponseFunctionToolCall, - ResponseInputParam, - ResponsesAPIStreamEvents, -) from openai.types.responses import ( FunctionToolParam, ResponseInputItemParam, @@ -60,9 +71,9 @@ class ResponseItemType(str, Enum): def _convert_content_to_param( chat_content: Iterable[conversation.Content], -) -> ResponseInputParam: +) -> list[ResponseInputItemParam]: """Convert any native chat message for this agent to the native format.""" - messages: ResponseInputParam = [] + messages: list[ResponseInputItemParam] = [] reasoning_summary: list[str] = [] web_search_calls: dict[str, dict[str, Any]] = {} @@ -238,7 +249,7 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have """Transform stream result into HA format.""" last_summary_index = None last_role: Literal["assistant", "tool_result"] | None = None - current_tool_call: ResponseFunctionToolCall | None = None + current_tool_call: LLMResponseFunctionCallOutputItem | None = None # Non-reasoning models don't follow our request to remove citations, so we remove # them manually here. They always follow the same pattern: the citation is always @@ -248,19 +259,10 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have citation_regexp = re.compile(r"\(\[([^\]]+)\]\((https?:\/\/[^\)]+)\)") async for event in stream: - event_type = getattr(event, "type", None) - event_item = getattr(event, "item", None) - event_item_type = getattr(event_item, "type", None) if event_item else None + _LOGGER.debug("Event[%s]", getattr(event, "type", None)) - _LOGGER.debug( - "Event[%s] | item: %s", - event_type, - event_item_type, - ) - - if event_type == ResponsesAPIStreamEvents.OUTPUT_ITEM_ADDED: - # Detect function_call even when it's a BaseLiteLLMOpenAIResponseObject - if event_item_type == ResponseItemType.FUNCTION_CALL: + if isinstance(event, LLMResponseOutputItemAddedEvent): + if isinstance(event.item, LLMResponseFunctionCallOutputItem): # OpenAI has tool calls as individual events # while HA puts tool calls inside the assistant message. # We turn them into individual assistant content for HA @@ -268,11 +270,11 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have yield {"role": "assistant"} last_role = "assistant" last_summary_index = None - current_tool_call = cast(ResponseFunctionToolCall, event.item) + current_tool_call = event.item elif ( - event_item_type == ResponseItemType.MESSAGE + isinstance(event.item, LLMResponseMessageOutputItem) or ( - event_item_type == ResponseItemType.REASONING + isinstance(event.item, LLMResponseReasoningOutputItem) and last_summary_index is not None ) # Subsequent ResponseReasoningItem or last_role != "assistant" @@ -281,14 +283,14 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have last_role = "assistant" last_summary_index = None - elif event_type == ResponsesAPIStreamEvents.OUTPUT_ITEM_DONE: - if event_item_type == ResponseItemType.REASONING: - encrypted_content = getattr(event.item, "encrypted_content", None) - summary = getattr(event.item, "summary", []) or [] + elif isinstance(event, LLMResponseOutputItemDoneEvent): + if isinstance(event.item, LLMResponseReasoningOutputItem): + encrypted_content = event.item.encrypted_content + summary = event.item.summary yield { - "native": ResponseReasoningItem( - type="reasoning", + "native": LLMResponseReasoningOutputItem( + type=event.item.type, id=event.item.id, summary=[], encrypted_content=encrypted_content, @@ -296,14 +298,8 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have } last_summary_index = len(summary) - 1 if summary else None - elif event_item_type == ResponseItemType.WEB_SEARCH_CALL: - action = getattr(event.item, "action", None) - if isinstance(action, dict): - action_dict = action - elif action is not None: - action_dict = action.to_dict() - else: - action_dict = {} + elif isinstance(event.item, LLMResponseWebSearchCallOutputItem): + action_dict = event.item.action yield { "tool_calls": [ llm.ToolInput( @@ -321,11 +317,11 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have "tool_result": {"status": event.item.status}, } last_role = "tool_result" - elif event_item_type == ResponseItemType.IMAGE: - yield {"native": event.item} + elif isinstance(event.item, LLMResponseImageOutputItem): + yield {"native": event.item.raw} last_summary_index = -1 # Trigger new assistant message on next turn - elif event_type == ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA: + elif isinstance(event, LLMResponseOutputTextDeltaEvent): data = event.delta if remove_parentheses: data = data.removeprefix(")") @@ -344,7 +340,7 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have if data: yield {"content": data} - elif event_type == ResponsesAPIStreamEvents.REASONING_SUMMARY_TEXT_DELTA: + elif isinstance(event, LLMResponseReasoningSummaryTextDeltaEvent): # OpenAI can output several reasoning summaries # in a single ResponseReasoningItem. We split them as separate # AssistantContent messages. Only last of them will have @@ -358,14 +354,14 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have last_summary_index = event.summary_index yield {"thinking_content": event.delta} - elif event_type == ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA: + elif isinstance(event, LLMResponseFunctionCallArgumentsDeltaEvent): if current_tool_call is not None: current_tool_call.arguments += event.delta - elif event_type == ResponsesAPIStreamEvents.WEB_SEARCH_CALL_SEARCHING: + elif isinstance(event, LLMResponseWebSearchCallSearchingEvent): yield {"role": "assistant"} - elif event_type == ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DONE: + elif isinstance(event, LLMResponseFunctionCallArgumentsDoneEvent): if current_tool_call is not None: current_tool_call.status = "completed" @@ -385,35 +381,36 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have ] } - elif event_type == ResponsesAPIStreamEvents.RESPONSE_COMPLETED: - if event.response.usage is not None: + elif isinstance(event, LLMResponseCompletedEvent): + response = event.response + if response and "usage" in response: + usage = response["usage"] chat_log.async_trace( { "stats": { - "input_tokens": event.response.usage.input_tokens, - "output_tokens": event.response.usage.output_tokens, + "input_tokens": usage.get("input_tokens"), + "output_tokens": usage.get("output_tokens"), } } ) - elif event_type == ResponsesAPIStreamEvents.RESPONSE_INCOMPLETE: - if event.response.usage is not None: + elif isinstance(event, LLMResponseIncompleteEvent): + response = event.response + if response and "usage" in response: + usage = response["usage"] chat_log.async_trace( { "stats": { - "input_tokens": event.response.usage.input_tokens, - "output_tokens": event.response.usage.output_tokens, + "input_tokens": usage.get("input_tokens"), + "output_tokens": usage.get("output_tokens"), } } ) - if ( - event.response.incomplete_details - and event.response.incomplete_details.reason - ): - reason: str = event.response.incomplete_details.reason - else: - reason = "unknown reason" + incomplete_details = response.get("incomplete_details") + reason = "unknown reason" + if incomplete_details is not None and incomplete_details.get("reason"): + reason = incomplete_details["reason"] if reason == "max_output_tokens": reason = "max output tokens reached" @@ -422,22 +419,24 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have raise HomeAssistantError(f"OpenAI response incomplete: {reason}") - elif event_type == ResponsesAPIStreamEvents.RESPONSE_FAILED: - if event.response.usage is not None: + elif isinstance(event, LLMResponseFailedEvent): + response = event.response + if response and "usage" in response: + usage = response["usage"] chat_log.async_trace( { "stats": { - "input_tokens": event.response.usage.input_tokens, - "output_tokens": event.response.usage.output_tokens, + "input_tokens": usage.get("input_tokens"), + "output_tokens": usage.get("output_tokens"), } } ) reason = "unknown reason" - if event.response.error is not None: - reason = event.response.error.message + if isinstance(error := response.get("error"), dict): + reason = error.get("message") or reason raise HomeAssistantError(f"OpenAI response failed: {reason}") - elif event_type == ResponsesAPIStreamEvents.ERROR: + elif isinstance(event, LLMResponseErrorEvent): raise HomeAssistantError(f"OpenAI response error: {event.message}") @@ -452,7 +451,7 @@ class BaseCloudLLMEntity(Entity): async def _prepare_chat_for_generation( self, chat_log: conversation.ChatLog, - messages: ResponseInputParam, + messages: list[ResponseInputItemParam], response_format: dict[str, Any] | None = None, ) -> dict[str, Any]: """Prepare kwargs for Cloud LLM from the chat log.""" diff --git a/homeassistant/components/cloud/manifest.json b/homeassistant/components/cloud/manifest.json index 1b8766b83df..8c01a326797 100644 --- a/homeassistant/components/cloud/manifest.json +++ b/homeassistant/components/cloud/manifest.json @@ -13,6 +13,6 @@ "integration_type": "system", "iot_class": "cloud_push", "loggers": ["acme", "hass_nabucasa", "snitun"], - "requirements": ["hass-nabucasa==1.11.0"], + "requirements": ["hass-nabucasa==1.12.0", "openai==2.15.0"], "single_config_entry": true } diff --git a/homeassistant/package_constraints.txt b/homeassistant/package_constraints.txt index dee8f0661c7..1aab701d2f2 100644 --- a/homeassistant/package_constraints.txt +++ b/homeassistant/package_constraints.txt @@ -36,7 +36,7 @@ fnv-hash-fast==1.6.0 go2rtc-client==0.4.0 ha-ffmpeg==3.2.2 habluetooth==5.8.0 -hass-nabucasa==1.11.0 +hass-nabucasa==1.12.0 hassil==3.5.0 home-assistant-bluetooth==1.13.1 home-assistant-frontend==20260128.1 @@ -46,6 +46,7 @@ ifaddr==0.2.0 Jinja2==3.1.6 lru-dict==1.3.0 mutagen==1.47.0 +openai==2.15.0 orjson==3.11.5 packaging>=23.1 paho-mqtt==2.1.0 diff --git a/pyproject.toml b/pyproject.toml index e326e3a6c8b..108921e7bdc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ dependencies = [ "fnv-hash-fast==1.6.0", # hass-nabucasa is imported by helpers which don't depend on the cloud # integration - "hass-nabucasa==1.11.0", + "hass-nabucasa==1.12.0", # When bumping httpx, please check the version pins of # httpcore, anyio, and h11 in gen_requirements_all "httpx==0.28.1", diff --git a/requirements.txt b/requirements.txt index c03d6674545..3a2f519db57 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,7 +24,7 @@ cronsim==2.7 cryptography==46.0.2 fnv-hash-fast==1.6.0 ha-ffmpeg==3.2.2 -hass-nabucasa==1.11.0 +hass-nabucasa==1.12.0 hassil==3.5.0 home-assistant-bluetooth==1.13.1 home-assistant-intents==2026.1.6 diff --git a/requirements_all.txt b/requirements_all.txt index 0c037d4c713..84a04fc7d9b 100644 --- a/requirements_all.txt +++ b/requirements_all.txt @@ -1175,7 +1175,7 @@ habluetooth==5.8.0 hanna-cloud==0.0.7 # homeassistant.components.cloud -hass-nabucasa==1.11.0 +hass-nabucasa==1.12.0 # homeassistant.components.splunk hass-splunk==0.1.1 @@ -1664,6 +1664,7 @@ open-garage==0.2.0 # homeassistant.components.open_meteo open-meteo==0.3.2 +# homeassistant.components.cloud # homeassistant.components.open_router # homeassistant.components.openai_conversation openai==2.15.0 diff --git a/requirements_test_all.txt b/requirements_test_all.txt index 6a4a0d34e34..9361e03bd43 100644 --- a/requirements_test_all.txt +++ b/requirements_test_all.txt @@ -1045,7 +1045,7 @@ habluetooth==5.8.0 hanna-cloud==0.0.7 # homeassistant.components.cloud -hass-nabucasa==1.11.0 +hass-nabucasa==1.12.0 # homeassistant.components.assist_satellite # homeassistant.components.conversation @@ -1447,6 +1447,7 @@ open-garage==0.2.0 # homeassistant.components.open_meteo open-meteo==0.3.2 +# homeassistant.components.cloud # homeassistant.components.open_router # homeassistant.components.openai_conversation openai==2.15.0