mirror of
https://github.com/home-assistant/core.git
synced 2026-02-15 07:36:16 +00:00
Use OpenAI schema dataclasses for cloud stream responses (#161663)
This commit is contained in:
@@ -12,14 +12,25 @@ from hass_nabucasa import Cloud, NabuCasaBaseError
|
||||
from hass_nabucasa.llm import (
|
||||
LLMAuthenticationError,
|
||||
LLMRateLimitError,
|
||||
LLMResponseCompletedEvent,
|
||||
LLMResponseError,
|
||||
LLMResponseErrorEvent,
|
||||
LLMResponseFailedEvent,
|
||||
LLMResponseFunctionCallArgumentsDeltaEvent,
|
||||
LLMResponseFunctionCallArgumentsDoneEvent,
|
||||
LLMResponseFunctionCallOutputItem,
|
||||
LLMResponseImageOutputItem,
|
||||
LLMResponseIncompleteEvent,
|
||||
LLMResponseMessageOutputItem,
|
||||
LLMResponseOutputItemAddedEvent,
|
||||
LLMResponseOutputItemDoneEvent,
|
||||
LLMResponseOutputTextDeltaEvent,
|
||||
LLMResponseReasoningOutputItem,
|
||||
LLMResponseReasoningSummaryTextDeltaEvent,
|
||||
LLMResponseWebSearchCallOutputItem,
|
||||
LLMResponseWebSearchCallSearchingEvent,
|
||||
LLMServiceError,
|
||||
)
|
||||
from litellm import (
|
||||
ResponseFunctionToolCall,
|
||||
ResponseInputParam,
|
||||
ResponsesAPIStreamEvents,
|
||||
)
|
||||
from openai.types.responses import (
|
||||
FunctionToolParam,
|
||||
ResponseInputItemParam,
|
||||
@@ -60,9 +71,9 @@ class ResponseItemType(str, Enum):
|
||||
|
||||
def _convert_content_to_param(
|
||||
chat_content: Iterable[conversation.Content],
|
||||
) -> ResponseInputParam:
|
||||
) -> list[ResponseInputItemParam]:
|
||||
"""Convert any native chat message for this agent to the native format."""
|
||||
messages: ResponseInputParam = []
|
||||
messages: list[ResponseInputItemParam] = []
|
||||
reasoning_summary: list[str] = []
|
||||
web_search_calls: dict[str, dict[str, Any]] = {}
|
||||
|
||||
@@ -238,7 +249,7 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
|
||||
"""Transform stream result into HA format."""
|
||||
last_summary_index = None
|
||||
last_role: Literal["assistant", "tool_result"] | None = None
|
||||
current_tool_call: ResponseFunctionToolCall | None = None
|
||||
current_tool_call: LLMResponseFunctionCallOutputItem | None = None
|
||||
|
||||
# Non-reasoning models don't follow our request to remove citations, so we remove
|
||||
# them manually here. They always follow the same pattern: the citation is always
|
||||
@@ -248,19 +259,10 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
|
||||
citation_regexp = re.compile(r"\(\[([^\]]+)\]\((https?:\/\/[^\)]+)\)")
|
||||
|
||||
async for event in stream:
|
||||
event_type = getattr(event, "type", None)
|
||||
event_item = getattr(event, "item", None)
|
||||
event_item_type = getattr(event_item, "type", None) if event_item else None
|
||||
_LOGGER.debug("Event[%s]", getattr(event, "type", None))
|
||||
|
||||
_LOGGER.debug(
|
||||
"Event[%s] | item: %s",
|
||||
event_type,
|
||||
event_item_type,
|
||||
)
|
||||
|
||||
if event_type == ResponsesAPIStreamEvents.OUTPUT_ITEM_ADDED:
|
||||
# Detect function_call even when it's a BaseLiteLLMOpenAIResponseObject
|
||||
if event_item_type == ResponseItemType.FUNCTION_CALL:
|
||||
if isinstance(event, LLMResponseOutputItemAddedEvent):
|
||||
if isinstance(event.item, LLMResponseFunctionCallOutputItem):
|
||||
# OpenAI has tool calls as individual events
|
||||
# while HA puts tool calls inside the assistant message.
|
||||
# We turn them into individual assistant content for HA
|
||||
@@ -268,11 +270,11 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
|
||||
yield {"role": "assistant"}
|
||||
last_role = "assistant"
|
||||
last_summary_index = None
|
||||
current_tool_call = cast(ResponseFunctionToolCall, event.item)
|
||||
current_tool_call = event.item
|
||||
elif (
|
||||
event_item_type == ResponseItemType.MESSAGE
|
||||
isinstance(event.item, LLMResponseMessageOutputItem)
|
||||
or (
|
||||
event_item_type == ResponseItemType.REASONING
|
||||
isinstance(event.item, LLMResponseReasoningOutputItem)
|
||||
and last_summary_index is not None
|
||||
) # Subsequent ResponseReasoningItem
|
||||
or last_role != "assistant"
|
||||
@@ -281,14 +283,14 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
|
||||
last_role = "assistant"
|
||||
last_summary_index = None
|
||||
|
||||
elif event_type == ResponsesAPIStreamEvents.OUTPUT_ITEM_DONE:
|
||||
if event_item_type == ResponseItemType.REASONING:
|
||||
encrypted_content = getattr(event.item, "encrypted_content", None)
|
||||
summary = getattr(event.item, "summary", []) or []
|
||||
elif isinstance(event, LLMResponseOutputItemDoneEvent):
|
||||
if isinstance(event.item, LLMResponseReasoningOutputItem):
|
||||
encrypted_content = event.item.encrypted_content
|
||||
summary = event.item.summary
|
||||
|
||||
yield {
|
||||
"native": ResponseReasoningItem(
|
||||
type="reasoning",
|
||||
"native": LLMResponseReasoningOutputItem(
|
||||
type=event.item.type,
|
||||
id=event.item.id,
|
||||
summary=[],
|
||||
encrypted_content=encrypted_content,
|
||||
@@ -296,14 +298,8 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
|
||||
}
|
||||
|
||||
last_summary_index = len(summary) - 1 if summary else None
|
||||
elif event_item_type == ResponseItemType.WEB_SEARCH_CALL:
|
||||
action = getattr(event.item, "action", None)
|
||||
if isinstance(action, dict):
|
||||
action_dict = action
|
||||
elif action is not None:
|
||||
action_dict = action.to_dict()
|
||||
else:
|
||||
action_dict = {}
|
||||
elif isinstance(event.item, LLMResponseWebSearchCallOutputItem):
|
||||
action_dict = event.item.action
|
||||
yield {
|
||||
"tool_calls": [
|
||||
llm.ToolInput(
|
||||
@@ -321,11 +317,11 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
|
||||
"tool_result": {"status": event.item.status},
|
||||
}
|
||||
last_role = "tool_result"
|
||||
elif event_item_type == ResponseItemType.IMAGE:
|
||||
yield {"native": event.item}
|
||||
elif isinstance(event.item, LLMResponseImageOutputItem):
|
||||
yield {"native": event.item.raw}
|
||||
last_summary_index = -1 # Trigger new assistant message on next turn
|
||||
|
||||
elif event_type == ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA:
|
||||
elif isinstance(event, LLMResponseOutputTextDeltaEvent):
|
||||
data = event.delta
|
||||
if remove_parentheses:
|
||||
data = data.removeprefix(")")
|
||||
@@ -344,7 +340,7 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
|
||||
if data:
|
||||
yield {"content": data}
|
||||
|
||||
elif event_type == ResponsesAPIStreamEvents.REASONING_SUMMARY_TEXT_DELTA:
|
||||
elif isinstance(event, LLMResponseReasoningSummaryTextDeltaEvent):
|
||||
# OpenAI can output several reasoning summaries
|
||||
# in a single ResponseReasoningItem. We split them as separate
|
||||
# AssistantContent messages. Only last of them will have
|
||||
@@ -358,14 +354,14 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
|
||||
last_summary_index = event.summary_index
|
||||
yield {"thinking_content": event.delta}
|
||||
|
||||
elif event_type == ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DELTA:
|
||||
elif isinstance(event, LLMResponseFunctionCallArgumentsDeltaEvent):
|
||||
if current_tool_call is not None:
|
||||
current_tool_call.arguments += event.delta
|
||||
|
||||
elif event_type == ResponsesAPIStreamEvents.WEB_SEARCH_CALL_SEARCHING:
|
||||
elif isinstance(event, LLMResponseWebSearchCallSearchingEvent):
|
||||
yield {"role": "assistant"}
|
||||
|
||||
elif event_type == ResponsesAPIStreamEvents.FUNCTION_CALL_ARGUMENTS_DONE:
|
||||
elif isinstance(event, LLMResponseFunctionCallArgumentsDoneEvent):
|
||||
if current_tool_call is not None:
|
||||
current_tool_call.status = "completed"
|
||||
|
||||
@@ -385,35 +381,36 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
|
||||
]
|
||||
}
|
||||
|
||||
elif event_type == ResponsesAPIStreamEvents.RESPONSE_COMPLETED:
|
||||
if event.response.usage is not None:
|
||||
elif isinstance(event, LLMResponseCompletedEvent):
|
||||
response = event.response
|
||||
if response and "usage" in response:
|
||||
usage = response["usage"]
|
||||
chat_log.async_trace(
|
||||
{
|
||||
"stats": {
|
||||
"input_tokens": event.response.usage.input_tokens,
|
||||
"output_tokens": event.response.usage.output_tokens,
|
||||
"input_tokens": usage.get("input_tokens"),
|
||||
"output_tokens": usage.get("output_tokens"),
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
elif event_type == ResponsesAPIStreamEvents.RESPONSE_INCOMPLETE:
|
||||
if event.response.usage is not None:
|
||||
elif isinstance(event, LLMResponseIncompleteEvent):
|
||||
response = event.response
|
||||
if response and "usage" in response:
|
||||
usage = response["usage"]
|
||||
chat_log.async_trace(
|
||||
{
|
||||
"stats": {
|
||||
"input_tokens": event.response.usage.input_tokens,
|
||||
"output_tokens": event.response.usage.output_tokens,
|
||||
"input_tokens": usage.get("input_tokens"),
|
||||
"output_tokens": usage.get("output_tokens"),
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
if (
|
||||
event.response.incomplete_details
|
||||
and event.response.incomplete_details.reason
|
||||
):
|
||||
reason: str = event.response.incomplete_details.reason
|
||||
else:
|
||||
incomplete_details = response.get("incomplete_details")
|
||||
reason = "unknown reason"
|
||||
if incomplete_details is not None and incomplete_details.get("reason"):
|
||||
reason = incomplete_details["reason"]
|
||||
|
||||
if reason == "max_output_tokens":
|
||||
reason = "max output tokens reached"
|
||||
@@ -422,22 +419,24 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
|
||||
|
||||
raise HomeAssistantError(f"OpenAI response incomplete: {reason}")
|
||||
|
||||
elif event_type == ResponsesAPIStreamEvents.RESPONSE_FAILED:
|
||||
if event.response.usage is not None:
|
||||
elif isinstance(event, LLMResponseFailedEvent):
|
||||
response = event.response
|
||||
if response and "usage" in response:
|
||||
usage = response["usage"]
|
||||
chat_log.async_trace(
|
||||
{
|
||||
"stats": {
|
||||
"input_tokens": event.response.usage.input_tokens,
|
||||
"output_tokens": event.response.usage.output_tokens,
|
||||
"input_tokens": usage.get("input_tokens"),
|
||||
"output_tokens": usage.get("output_tokens"),
|
||||
}
|
||||
}
|
||||
)
|
||||
reason = "unknown reason"
|
||||
if event.response.error is not None:
|
||||
reason = event.response.error.message
|
||||
if isinstance(error := response.get("error"), dict):
|
||||
reason = error.get("message") or reason
|
||||
raise HomeAssistantError(f"OpenAI response failed: {reason}")
|
||||
|
||||
elif event_type == ResponsesAPIStreamEvents.ERROR:
|
||||
elif isinstance(event, LLMResponseErrorEvent):
|
||||
raise HomeAssistantError(f"OpenAI response error: {event.message}")
|
||||
|
||||
|
||||
@@ -452,7 +451,7 @@ class BaseCloudLLMEntity(Entity):
|
||||
async def _prepare_chat_for_generation(
|
||||
self,
|
||||
chat_log: conversation.ChatLog,
|
||||
messages: ResponseInputParam,
|
||||
messages: list[ResponseInputItemParam],
|
||||
response_format: dict[str, Any] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Prepare kwargs for Cloud LLM from the chat log."""
|
||||
|
||||
@@ -13,6 +13,6 @@
|
||||
"integration_type": "system",
|
||||
"iot_class": "cloud_push",
|
||||
"loggers": ["acme", "hass_nabucasa", "snitun"],
|
||||
"requirements": ["hass-nabucasa==1.11.0"],
|
||||
"requirements": ["hass-nabucasa==1.12.0", "openai==2.15.0"],
|
||||
"single_config_entry": true
|
||||
}
|
||||
|
||||
@@ -36,7 +36,7 @@ fnv-hash-fast==1.6.0
|
||||
go2rtc-client==0.4.0
|
||||
ha-ffmpeg==3.2.2
|
||||
habluetooth==5.8.0
|
||||
hass-nabucasa==1.11.0
|
||||
hass-nabucasa==1.12.0
|
||||
hassil==3.5.0
|
||||
home-assistant-bluetooth==1.13.1
|
||||
home-assistant-frontend==20260128.1
|
||||
@@ -46,6 +46,7 @@ ifaddr==0.2.0
|
||||
Jinja2==3.1.6
|
||||
lru-dict==1.3.0
|
||||
mutagen==1.47.0
|
||||
openai==2.15.0
|
||||
orjson==3.11.5
|
||||
packaging>=23.1
|
||||
paho-mqtt==2.1.0
|
||||
|
||||
@@ -48,7 +48,7 @@ dependencies = [
|
||||
"fnv-hash-fast==1.6.0",
|
||||
# hass-nabucasa is imported by helpers which don't depend on the cloud
|
||||
# integration
|
||||
"hass-nabucasa==1.11.0",
|
||||
"hass-nabucasa==1.12.0",
|
||||
# When bumping httpx, please check the version pins of
|
||||
# httpcore, anyio, and h11 in gen_requirements_all
|
||||
"httpx==0.28.1",
|
||||
|
||||
2
requirements.txt
generated
2
requirements.txt
generated
@@ -24,7 +24,7 @@ cronsim==2.7
|
||||
cryptography==46.0.2
|
||||
fnv-hash-fast==1.6.0
|
||||
ha-ffmpeg==3.2.2
|
||||
hass-nabucasa==1.11.0
|
||||
hass-nabucasa==1.12.0
|
||||
hassil==3.5.0
|
||||
home-assistant-bluetooth==1.13.1
|
||||
home-assistant-intents==2026.1.6
|
||||
|
||||
3
requirements_all.txt
generated
3
requirements_all.txt
generated
@@ -1175,7 +1175,7 @@ habluetooth==5.8.0
|
||||
hanna-cloud==0.0.7
|
||||
|
||||
# homeassistant.components.cloud
|
||||
hass-nabucasa==1.11.0
|
||||
hass-nabucasa==1.12.0
|
||||
|
||||
# homeassistant.components.splunk
|
||||
hass-splunk==0.1.1
|
||||
@@ -1664,6 +1664,7 @@ open-garage==0.2.0
|
||||
# homeassistant.components.open_meteo
|
||||
open-meteo==0.3.2
|
||||
|
||||
# homeassistant.components.cloud
|
||||
# homeassistant.components.open_router
|
||||
# homeassistant.components.openai_conversation
|
||||
openai==2.15.0
|
||||
|
||||
3
requirements_test_all.txt
generated
3
requirements_test_all.txt
generated
@@ -1045,7 +1045,7 @@ habluetooth==5.8.0
|
||||
hanna-cloud==0.0.7
|
||||
|
||||
# homeassistant.components.cloud
|
||||
hass-nabucasa==1.11.0
|
||||
hass-nabucasa==1.12.0
|
||||
|
||||
# homeassistant.components.assist_satellite
|
||||
# homeassistant.components.conversation
|
||||
@@ -1447,6 +1447,7 @@ open-garage==0.2.0
|
||||
# homeassistant.components.open_meteo
|
||||
open-meteo==0.3.2
|
||||
|
||||
# homeassistant.components.cloud
|
||||
# homeassistant.components.open_router
|
||||
# homeassistant.components.openai_conversation
|
||||
openai==2.15.0
|
||||
|
||||
Reference in New Issue
Block a user