1
0
mirror of https://github.com/home-assistant/core.git synced 2026-02-15 07:36:16 +00:00

Use OpenAI schema dataclasses for cloud stream responses (#161663)

This commit is contained in:
victorigualada
2026-01-28 20:59:03 +01:00
committed by GitHub
parent 8536472fe9
commit bdbce57217
7 changed files with 75 additions and 73 deletions

View File

@@ -12,14 +12,25 @@ from hass_nabucasa import Cloud, NabuCasaBaseError
from hass_nabucasa.llm import (
LLMAuthenticationError,
LLMRateLimitError,
LLMResponseCompletedEvent,
LLMResponseError,
LLMResponseErrorEvent,
LLMResponseFailedEvent,
LLMResponseFunctionCallArgumentsDeltaEvent,
LLMResponseFunctionCallArgumentsDoneEvent,
LLMResponseFunctionCallOutputItem,
LLMResponseImageOutputItem,
LLMResponseIncompleteEvent,
LLMResponseMessageOutputItem,
LLMResponseOutputItemAddedEvent,
LLMResponseOutputItemDoneEvent,
LLMResponseOutputTextDeltaEvent,
LLMResponseReasoningOutputItem,
LLMResponseReasoningSummaryTextDeltaEvent,
LLMResponseWebSearchCallOutputItem,
LLMResponseWebSearchCallSearchingEvent,
LLMServiceError,
)
from litellm import (
ResponseFunctionToolCall,
ResponseInputParam,
ResponsesAPIStreamEvents,
)
from openai.types.responses import (
FunctionToolParam,
ResponseInputItemParam,
@@ -60,9 +71,9 @@ class ResponseItemType(str, Enum):
def _convert_content_to_param(
chat_content: Iterable[conversation.Content],
) -> ResponseInputParam:
) -> list[ResponseInputItemParam]:
"""Convert any native chat message for this agent to the native format."""
messages: ResponseInputParam = []
messages: list[ResponseInputItemParam] = []
reasoning_summary: list[str] = []
web_search_calls: dict[str, dict[str, Any]] = {}
@@ -238,7 +249,7 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
"""Transform stream result into HA format."""
last_summary_index = None
last_role: Literal["assistant", "tool_result"] | None = None
current_tool_call: ResponseFunctionToolCall | None = None
current_tool_call: LLMResponseFunctionCallOutputItem | None = None
# Non-reasoning models don't follow our request to remove citations, so we remove
# them manually here. They always follow the same pattern: the citation is always
@@ -248,19 +259,10 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
citation_regexp = re.compile(r"\(\[([^\]]+)\]\((https?:\/\/[^\)]+)\)")
async for event in stream:
event_type = getattr(event, "type", None)
event_item = getattr(event, "item", None)
event_item_type = getattr(event_item, "type", None) if event_item else None
_LOGGER.debug("Event[%s]", getattr(event, "type", None))
_LOGGER.debug(
"Event[%s] | item: %s",
event_type,
event_item_type,
)
if event_type == ResponsesAPIStreamEvents.OUTPUT_ITEM_ADDED:
# Detect function_call even when it's a BaseLiteLLMOpenAIResponseObject
if event_item_type == ResponseItemType.FUNCTION_CALL:
if isinstance(event, LLMResponseOutputItemAddedEvent):
if isinstance(event.item, LLMResponseFunctionCallOutputItem):
# OpenAI has tool calls as individual events
# while HA puts tool calls inside the assistant message.
# We turn them into individual assistant content for HA
@@ -268,11 +270,11 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
yield {"role": "assistant"}
last_role = "assistant"
last_summary_index = None
current_tool_call = cast(ResponseFunctionToolCall, event.item)
current_tool_call = event.item
elif (
event_item_type == ResponseItemType.MESSAGE
isinstance(event.item, LLMResponseMessageOutputItem)
or (
event_item_type == ResponseItemType.REASONING
isinstance(event.item, LLMResponseReasoningOutputItem)
and last_summary_index is not None
) # Subsequent ResponseReasoningItem
or last_role != "assistant"
@@ -281,14 +283,14 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
last_role = "assistant"
last_summary_index = None
elif event_type == ResponsesAPIStreamEvents.OUTPUT_ITEM_DONE:
if event_item_type == ResponseItemType.REASONING:
encrypted_content = getattr(event.item, "encrypted_content", None)
summary = getattr(event.item, "summary", []) or []
elif isinstance(event, LLMResponseOutputItemDoneEvent):
if isinstance(event.item, LLMResponseReasoningOutputItem):
encrypted_content = event.item.encrypted_content
summary = event.item.summary
yield {
"native": ResponseReasoningItem(
type="reasoning",
"native": LLMResponseReasoningOutputItem(
type=event.item.type,
id=event.item.id,
summary=[],
encrypted_content=encrypted_content,
@@ -296,14 +298,8 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
}
last_summary_index = len(summary) - 1 if summary else None
elif event_item_type == ResponseItemType.WEB_SEARCH_CALL:
action = getattr(event.item, "action", None)
if isinstance(action, dict):
action_dict = action
elif action is not None:
action_dict = action.to_dict()
else:
action_dict = {}
elif isinstance(event.item, LLMResponseWebSearchCallOutputItem):
action_dict = event.item.action
yield {
"tool_calls": [
llm.ToolInput(
@@ -321,11 +317,11 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
"tool_result": {"status": event.item.status},
}
last_role = "tool_result"
elif event_item_type == ResponseItemType.IMAGE:
yield {"native": event.item}
elif isinstance(event.item, LLMResponseImageOutputItem):
yield {"native": event.item.raw}
last_summary_index = -1 # Trigger new assistant message on next turn
elif event_type == ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA:
elif isinstance(event, LLMResponseOutputTextDeltaEvent):
data = event.delta
if remove_parentheses:
data = data.removeprefix(")")
@@ -344,7 +340,7 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
if data:
yield {"content": data}
elif event_type == ResponsesAPIStreamEvents.REASONING_SUMMARY_TEXT_DELTA:
elif isinstance(event, LLMResponseReasoningSummaryTextDeltaEvent):
# OpenAI can output several reasoning summaries
# in a single ResponseReasoningItem. We split them as separate
# AssistantContent messages. Only last of them will have
@@ -358,14 +354,14 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
last_summary_index = event.summary_index
yield {"thinking_content": event.delta}
elif event_type == ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA:
elif isinstance(event, LLMResponseFunctionCallArgumentsDeltaEvent):
if current_tool_call is not None:
current_tool_call.arguments += event.delta
elif event_type == ResponsesAPIStreamEvents.WEB_SEARCH_CALL_SEARCHING:
elif isinstance(event, LLMResponseWebSearchCallSearchingEvent):
yield {"role": "assistant"}
elif event_type == ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DONE:
elif isinstance(event, LLMResponseFunctionCallArgumentsDoneEvent):
if current_tool_call is not None:
current_tool_call.status = "completed"
@@ -385,35 +381,36 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
]
}
elif event_type == ResponsesAPIStreamEvents.RESPONSE_COMPLETED:
if event.response.usage is not None:
elif isinstance(event, LLMResponseCompletedEvent):
response = event.response
if response and "usage" in response:
usage = response["usage"]
chat_log.async_trace(
{
"stats": {
"input_tokens": event.response.usage.input_tokens,
"output_tokens": event.response.usage.output_tokens,
"input_tokens": usage.get("input_tokens"),
"output_tokens": usage.get("output_tokens"),
}
}
)
elif event_type == ResponsesAPIStreamEvents.RESPONSE_INCOMPLETE:
if event.response.usage is not None:
elif isinstance(event, LLMResponseIncompleteEvent):
response = event.response
if response and "usage" in response:
usage = response["usage"]
chat_log.async_trace(
{
"stats": {
"input_tokens": event.response.usage.input_tokens,
"output_tokens": event.response.usage.output_tokens,
"input_tokens": usage.get("input_tokens"),
"output_tokens": usage.get("output_tokens"),
}
}
)
if (
event.response.incomplete_details
and event.response.incomplete_details.reason
):
reason: str = event.response.incomplete_details.reason
else:
incomplete_details = response.get("incomplete_details")
reason = "unknown reason"
if incomplete_details is not None and incomplete_details.get("reason"):
reason = incomplete_details["reason"]
if reason == "max_output_tokens":
reason = "max output tokens reached"
@@ -422,22 +419,24 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
raise HomeAssistantError(f"OpenAI response incomplete: {reason}")
elif event_type == ResponsesAPIStreamEvents.RESPONSE_FAILED:
if event.response.usage is not None:
elif isinstance(event, LLMResponseFailedEvent):
response = event.response
if response and "usage" in response:
usage = response["usage"]
chat_log.async_trace(
{
"stats": {
"input_tokens": event.response.usage.input_tokens,
"output_tokens": event.response.usage.output_tokens,
"input_tokens": usage.get("input_tokens"),
"output_tokens": usage.get("output_tokens"),
}
}
)
reason = "unknown reason"
if event.response.error is not None:
reason = event.response.error.message
if isinstance(error := response.get("error"), dict):
reason = error.get("message") or reason
raise HomeAssistantError(f"OpenAI response failed: {reason}")
elif event_type == ResponsesAPIStreamEvents.ERROR:
elif isinstance(event, LLMResponseErrorEvent):
raise HomeAssistantError(f"OpenAI response error: {event.message}")
@@ -452,7 +451,7 @@ class BaseCloudLLMEntity(Entity):
async def _prepare_chat_for_generation(
self,
chat_log: conversation.ChatLog,
messages: ResponseInputParam,
messages: list[ResponseInputItemParam],
response_format: dict[str, Any] | None = None,
) -> dict[str, Any]:
"""Prepare kwargs for Cloud LLM from the chat log."""

View File

@@ -13,6 +13,6 @@
"integration_type": "system",
"iot_class": "cloud_push",
"loggers": ["acme", "hass_nabucasa", "snitun"],
"requirements": ["hass-nabucasa==1.11.0"],
"requirements": ["hass-nabucasa==1.12.0", "openai==2.15.0"],
"single_config_entry": true
}

View File

@@ -36,7 +36,7 @@ fnv-hash-fast==1.6.0
go2rtc-client==0.4.0
ha-ffmpeg==3.2.2
habluetooth==5.8.0
hass-nabucasa==1.11.0
hass-nabucasa==1.12.0
hassil==3.5.0
home-assistant-bluetooth==1.13.1
home-assistant-frontend==20260128.1
@@ -46,6 +46,7 @@ ifaddr==0.2.0
Jinja2==3.1.6
lru-dict==1.3.0
mutagen==1.47.0
openai==2.15.0
orjson==3.11.5
packaging>=23.1
paho-mqtt==2.1.0

View File

@@ -48,7 +48,7 @@ dependencies = [
"fnv-hash-fast==1.6.0",
# hass-nabucasa is imported by helpers which don't depend on the cloud
# integration
"hass-nabucasa==1.11.0",
"hass-nabucasa==1.12.0",
# When bumping httpx, please check the version pins of
# httpcore, anyio, and h11 in gen_requirements_all
"httpx==0.28.1",

2
requirements.txt generated
View File

@@ -24,7 +24,7 @@ cronsim==2.7
cryptography==46.0.2
fnv-hash-fast==1.6.0
ha-ffmpeg==3.2.2
hass-nabucasa==1.11.0
hass-nabucasa==1.12.0
hassil==3.5.0
home-assistant-bluetooth==1.13.1
home-assistant-intents==2026.1.6

3
requirements_all.txt generated
View File

@@ -1175,7 +1175,7 @@ habluetooth==5.8.0
hanna-cloud==0.0.7
# homeassistant.components.cloud
hass-nabucasa==1.11.0
hass-nabucasa==1.12.0
# homeassistant.components.splunk
hass-splunk==0.1.1
@@ -1664,6 +1664,7 @@ open-garage==0.2.0
# homeassistant.components.open_meteo
open-meteo==0.3.2
# homeassistant.components.cloud
# homeassistant.components.open_router
# homeassistant.components.openai_conversation
openai==2.15.0

View File

@@ -1045,7 +1045,7 @@ habluetooth==5.8.0
hanna-cloud==0.0.7
# homeassistant.components.cloud
hass-nabucasa==1.11.0
hass-nabucasa==1.12.0
# homeassistant.components.assist_satellite
# homeassistant.components.conversation
@@ -1447,6 +1447,7 @@ open-garage==0.2.0
# homeassistant.components.open_meteo
open-meteo==0.3.2
# homeassistant.components.cloud
# homeassistant.components.open_router
# homeassistant.components.openai_conversation
openai==2.15.0