1
0
mirror of https://github.com/home-assistant/core.git synced 2025-12-24 21:06:19 +00:00

Add tools in default agent also in fallback pipeline (#157441)

This commit is contained in:
Artur Pragacz
2025-11-28 09:47:52 +01:00
committed by GitHub
parent ef36d7b1e5
commit 9f3dae6254
5 changed files with 185 additions and 159 deletions

View File

@@ -1123,63 +1123,6 @@ class PipelineRun:
)
try:
user_input = conversation.ConversationInput(
text=intent_input,
context=self.context,
conversation_id=conversation_id,
device_id=self._device_id,
satellite_id=self._satellite_id,
language=input_language,
agent_id=self.intent_agent.id,
extra_system_prompt=conversation_extra_system_prompt,
)
agent_id = self.intent_agent.id
processed_locally = agent_id == conversation.HOME_ASSISTANT_AGENT
all_targets_in_satellite_area = False
intent_response: intent.IntentResponse | None = None
if not processed_locally and not self._intent_agent_only:
# Sentence triggers override conversation agent
if (
trigger_response_text
:= await conversation.async_handle_sentence_triggers(
self.hass, user_input
)
) is not None:
# Sentence trigger matched
agent_id = "sentence_trigger"
processed_locally = True
intent_response = intent.IntentResponse(
self.pipeline.conversation_language
)
intent_response.async_set_speech(trigger_response_text)
intent_filter: Callable[[RecognizeResult], bool] | None = None
# If the LLM has API access, we filter out some sentences that are
# interfering with LLM operation.
if (
intent_agent_state := self.hass.states.get(self.intent_agent.id)
) and intent_agent_state.attributes.get(
ATTR_SUPPORTED_FEATURES, 0
) & conversation.ConversationEntityFeature.CONTROL:
intent_filter = _async_local_fallback_intent_filter
# Try local intents
if (
intent_response is None
and self.pipeline.prefer_local_intents
and (
intent_response := await conversation.async_handle_intents(
self.hass,
user_input,
intent_filter=intent_filter,
)
)
):
# Local intent matched
agent_id = conversation.HOME_ASSISTANT_AGENT
processed_locally = True
if self.tts_stream and self.tts_stream.supports_streaming_input:
tts_input_stream: asyncio.Queue[str | None] | None = asyncio.Queue()
else:
@@ -1265,6 +1208,17 @@ class PipelineRun:
assert self.tts_stream is not None
self.tts_stream.async_set_message_stream(tts_input_stream_generator())
user_input = conversation.ConversationInput(
text=intent_input,
context=self.context,
conversation_id=conversation_id,
device_id=self._device_id,
satellite_id=self._satellite_id,
language=input_language,
agent_id=self.intent_agent.id,
extra_system_prompt=conversation_extra_system_prompt,
)
with (
chat_session.async_get_chat_session(
self.hass, user_input.conversation_id
@@ -1276,6 +1230,53 @@ class PipelineRun:
chat_log_delta_listener=chat_log_delta_listener,
) as chat_log,
):
agent_id = self.intent_agent.id
processed_locally = agent_id == conversation.HOME_ASSISTANT_AGENT
all_targets_in_satellite_area = False
intent_response: intent.IntentResponse | None = None
if not processed_locally and not self._intent_agent_only:
# Sentence triggers override conversation agent
if (
trigger_response_text
:= await conversation.async_handle_sentence_triggers(
self.hass, user_input, chat_log
)
) is not None:
# Sentence trigger matched
agent_id = "sentence_trigger"
processed_locally = True
intent_response = intent.IntentResponse(
self.pipeline.conversation_language
)
intent_response.async_set_speech(trigger_response_text)
intent_filter: Callable[[RecognizeResult], bool] | None = None
# If the LLM has API access, we filter out some sentences that are
# interfering with LLM operation.
if (
intent_agent_state := self.hass.states.get(self.intent_agent.id)
) and intent_agent_state.attributes.get(
ATTR_SUPPORTED_FEATURES, 0
) & conversation.ConversationEntityFeature.CONTROL:
intent_filter = _async_local_fallback_intent_filter
# Try local intents
if (
intent_response is None
and self.pipeline.prefer_local_intents
and (
intent_response := await conversation.async_handle_intents(
self.hass,
user_input,
chat_log,
intent_filter=intent_filter,
)
)
):
# Local intent matched
agent_id = conversation.HOME_ASSISTANT_AGENT
processed_locally = True
# It was already handled, create response and add to chat history
if intent_response is not None:
speech: str = intent_response.speech.get("plain", {}).get(

View File

@@ -236,7 +236,9 @@ async def async_prepare_agent(
async def async_handle_sentence_triggers(
hass: HomeAssistant, user_input: ConversationInput
hass: HomeAssistant,
user_input: ConversationInput,
chat_log: ChatLog,
) -> str | None:
"""Try to match input against sentence triggers and return response text.
@@ -245,12 +247,13 @@ async def async_handle_sentence_triggers(
agent = get_agent_manager(hass).default_agent
assert agent is not None
return await agent.async_handle_sentence_triggers(user_input)
return await agent.async_handle_sentence_triggers(user_input, chat_log)
async def async_handle_intents(
hass: HomeAssistant,
user_input: ConversationInput,
chat_log: ChatLog,
*,
intent_filter: Callable[[RecognizeResult], bool] | None = None,
) -> intent.IntentResponse | None:
@@ -261,7 +264,9 @@ async def async_handle_intents(
agent = get_agent_manager(hass).default_agent
assert agent is not None
return await agent.async_handle_intents(user_input, intent_filter=intent_filter)
return await agent.async_handle_intents(
user_input, chat_log, intent_filter=intent_filter
)
async def async_setup(hass: HomeAssistant, config: ConfigType) -> bool:

View File

@@ -431,26 +431,14 @@ class DefaultAgent(ConversationEntity):
) -> ConversationResult:
"""Handle a message."""
response: intent.IntentResponse | None = None
tool_input: llm.ToolInput | None = None
tool_result: dict[str, Any] = {}
# Check if a trigger matched
if trigger_result := await self.async_recognize_sentence_trigger(user_input):
# Process callbacks and get response
response_text = await self._handle_trigger_result(
trigger_result, user_input
trigger_result, user_input, chat_log
)
# Create tool result
tool_input = llm.ToolInput(
tool_name="trigger_sentence",
tool_args={},
external=True,
)
tool_result = {
"response": response_text,
}
# Convert to conversation result
response = intent.IntentResponse(
language=user_input.language or self.hass.config.language
@@ -462,40 +450,7 @@ class DefaultAgent(ConversationEntity):
intent_result = await self.async_recognize_intent(user_input)
response = await self._async_process_intent_result(
intent_result, user_input
)
if response.response_type != intent.IntentResponseType.ERROR:
assert intent_result is not None
assert intent_result.intent is not None
# Create external tool call for the intent
tool_input = llm.ToolInput(
tool_name=intent_result.intent.name,
tool_args={
entity.name: entity.value or entity.text
for entity in intent_result.entities_list
},
external=True,
)
# Create tool result from intent response
tool_result = llm.IntentResponseDict(response)
# Add tool call and result to chat log if we have one
if tool_input is not None:
chat_log.async_add_assistant_content_without_tools(
AssistantContent(
agent_id=user_input.agent_id,
content=None,
tool_calls=[tool_input],
)
)
chat_log.async_add_assistant_content_without_tools(
ToolResultContent(
agent_id=user_input.agent_id,
tool_call_id=tool_input.id,
tool_name=tool_input.tool_name,
tool_result=tool_result,
)
intent_result, user_input, chat_log
)
speech: str = response.speech.get("plain", {}).get("speech", "")
@@ -514,6 +469,7 @@ class DefaultAgent(ConversationEntity):
self,
result: RecognizeResult | None,
user_input: ConversationInput,
chat_log: ChatLog,
) -> intent.IntentResponse:
"""Process user input with intents."""
language = user_input.language or self.hass.config.language
@@ -576,12 +532,21 @@ class DefaultAgent(ConversationEntity):
ConversationTraceEventType.TOOL_CALL,
{
"intent_name": result.intent.name,
"slots": {
entity.name: entity.value or entity.text
for entity in result.entities_list
},
"slots": {entity.name: entity.value for entity in result.entities_list},
},
)
tool_input = llm.ToolInput(
tool_name=result.intent.name,
tool_args={entity.name: entity.value for entity in result.entities_list},
external=True,
)
chat_log.async_add_assistant_content_without_tools(
AssistantContent(
agent_id=user_input.agent_id,
content=None,
tool_calls=[tool_input],
)
)
try:
intent_response = await intent.async_handle(
@@ -644,6 +609,16 @@ class DefaultAgent(ConversationEntity):
)
intent_response.async_set_speech(speech)
tool_result = llm.IntentResponseDict(intent_response)
chat_log.async_add_assistant_content_without_tools(
ToolResultContent(
agent_id=user_input.agent_id,
tool_call_id=tool_input.id,
tool_name=tool_input.tool_name,
tool_result=tool_result,
)
)
return intent_response
def _recognize(
@@ -1570,16 +1545,31 @@ class DefaultAgent(ConversationEntity):
)
async def _handle_trigger_result(
self, result: SentenceTriggerResult, user_input: ConversationInput
self,
result: SentenceTriggerResult,
user_input: ConversationInput,
chat_log: ChatLog,
) -> str:
"""Run sentence trigger callbacks and return response text."""
# Gather callback responses in parallel
trigger_callbacks = [
self._triggers_details[trigger_id].callback(user_input, trigger_result)
for trigger_id, trigger_result in result.matched_triggers.items()
]
tool_input = llm.ToolInput(
tool_name="trigger_sentence",
tool_args={},
external=True,
)
chat_log.async_add_assistant_content_without_tools(
AssistantContent(
agent_id=user_input.agent_id,
content=None,
tool_calls=[tool_input],
)
)
# Use first non-empty result as response.
#
# There may be multiple copies of a trigger running when editing in
@@ -1608,23 +1598,38 @@ class DefaultAgent(ConversationEntity):
f"component.{DOMAIN}.conversation.agent.done", "Done"
)
tool_result: dict[str, Any] = {"response": response_text}
chat_log.async_add_assistant_content_without_tools(
ToolResultContent(
agent_id=user_input.agent_id,
tool_call_id=tool_input.id,
tool_name=tool_input.tool_name,
tool_result=tool_result,
)
)
return response_text
async def async_handle_sentence_triggers(
self, user_input: ConversationInput
self,
user_input: ConversationInput,
chat_log: ChatLog,
) -> str | None:
"""Try to input sentence against sentence triggers and return response text.
Returns None if no match occurred.
"""
if trigger_result := await self.async_recognize_sentence_trigger(user_input):
return await self._handle_trigger_result(trigger_result, user_input)
return await self._handle_trigger_result(
trigger_result, user_input, chat_log
)
return None
async def async_handle_intents(
self,
user_input: ConversationInput,
chat_log: ChatLog,
*,
intent_filter: Callable[[RecognizeResult], bool] | None = None,
) -> intent.IntentResponse | None:
@@ -1640,7 +1645,7 @@ class DefaultAgent(ConversationEntity):
# No error message on failed match
return None
response = await self._async_process_intent_result(result, user_input)
response = await self._async_process_intent_result(result, user_input, chat_log)
if (
response.response_type == intent.IntentResponseType.ERROR
and response.error_code

View File

@@ -3181,13 +3181,17 @@ async def test_handle_intents_with_response_errors(
agent_id=None,
)
with patch(
"homeassistant.components.conversation.default_agent.DefaultAgent._async_process_intent_result",
return_value=default_agent._make_error_result(
user_input.language, error_code, "Mock error message"
),
) as mock_process:
response = await agent.async_handle_intents(user_input)
with (
patch(
"homeassistant.components.conversation.default_agent.DefaultAgent._async_process_intent_result",
return_value=default_agent._make_error_result(
user_input.language, error_code, "Mock error message"
),
) as mock_process,
chat_session.async_get_chat_session(hass) as session,
async_get_chat_log(hass, session, user_input) as chat_log,
):
response = await agent.async_handle_intents(user_input, chat_log)
assert len(mock_process.mock_calls) == 1
@@ -3240,9 +3244,11 @@ async def test_handle_intents_filters_results(
patch(
"homeassistant.components.conversation.default_agent.DefaultAgent._async_process_intent_result",
) as mock_process,
chat_session.async_get_chat_session(hass) as session,
async_get_chat_log(hass, session, user_input) as chat_log,
):
response = await agent.async_handle_intents(
user_input, intent_filter=_filter_intents
user_input, chat_log, intent_filter=_filter_intents
)
assert len(mock_recognize.mock_calls) == 1
@@ -3257,7 +3263,7 @@ async def test_handle_intents_filters_results(
# Second time it is not filtered
response = await agent.async_handle_intents(
user_input, intent_filter=_filter_intents
user_input, chat_log, intent_filter=_filter_intents
)
assert len(mock_recognize.mock_calls) == 2

View File

@@ -11,6 +11,7 @@ from homeassistant.components import conversation
from homeassistant.components.conversation import (
ConversationInput,
async_get_agent,
async_get_chat_log,
async_handle_intents,
async_handle_sentence_triggers,
default_agent,
@@ -19,7 +20,7 @@ from homeassistant.components.conversation.const import HOME_ASSISTANT_AGENT
from homeassistant.components.light import DOMAIN as LIGHT_DOMAIN
from homeassistant.core import Context, HomeAssistant
from homeassistant.exceptions import HomeAssistantError
from homeassistant.helpers import intent
from homeassistant.helpers import chat_session, intent
from homeassistant.setup import async_setup_component
from . import MockAgent
@@ -288,18 +289,22 @@ async def test_async_handle_sentence_triggers(
# Device id will be available in response template
device_id = "1234"
actual_response = await async_handle_sentence_triggers(
hass,
ConversationInput(
text="my trigger",
context=Context(),
conversation_id=None,
agent_id=conversation.HOME_ASSISTANT_AGENT,
device_id=device_id,
satellite_id=None,
language=hass.config.language,
),
user_input = ConversationInput(
text="my trigger",
context=Context(),
conversation_id=None,
agent_id=conversation.HOME_ASSISTANT_AGENT,
device_id=device_id,
satellite_id=None,
language=hass.config.language,
)
with (
chat_session.async_get_chat_session(hass) as session,
async_get_chat_log(hass, session, user_input) as chat_log,
):
actual_response = await async_handle_sentence_triggers(
hass, user_input, chat_log
)
assert actual_response == expected_response
@@ -326,34 +331,38 @@ async def test_async_handle_intents(hass: HomeAssistant) -> None:
intent.async_register(hass, handler)
# Registered intent will be handled
result = await async_handle_intents(
hass,
ConversationInput(
text="I'd like to order a stout",
context=Context(),
agent_id=conversation.HOME_ASSISTANT_AGENT,
conversation_id=None,
device_id=None,
satellite_id=None,
language=hass.config.language,
),
user_input = ConversationInput(
text="I'd like to order a stout",
context=Context(),
agent_id=conversation.HOME_ASSISTANT_AGENT,
conversation_id=None,
device_id=None,
satellite_id=None,
language=hass.config.language,
)
with (
chat_session.async_get_chat_session(hass) as session,
async_get_chat_log(hass, session, user_input) as chat_log,
):
result = await async_handle_intents(hass, user_input, chat_log)
assert result is not None
assert result.intent is not None
assert result.intent.intent_type == handler.intent_type
assert handler.was_handled
# No error messages, just None as a result
result = await async_handle_intents(
hass,
ConversationInput(
text="this sentence does not exist",
agent_id=conversation.HOME_ASSISTANT_AGENT,
context=Context(),
conversation_id=None,
device_id=None,
satellite_id=None,
language=hass.config.language,
),
user_input2 = ConversationInput(
text="this sentence does not exist",
agent_id=conversation.HOME_ASSISTANT_AGENT,
context=Context(),
conversation_id=None,
device_id=None,
satellite_id=None,
language=hass.config.language,
)
with (
chat_session.async_get_chat_session(hass) as session,
async_get_chat_log(hass, session, user_input2) as chat_log,
):
result = await async_handle_intents(hass, user_input2, chat_log)
assert result is None