mirror of
https://github.com/home-assistant/core.git
synced 2025-12-24 21:06:19 +00:00
Add tools in default agent also in fallback pipeline (#157441)
This commit is contained in:
@@ -1123,63 +1123,6 @@ class PipelineRun:
|
||||
)
|
||||
|
||||
try:
|
||||
user_input = conversation.ConversationInput(
|
||||
text=intent_input,
|
||||
context=self.context,
|
||||
conversation_id=conversation_id,
|
||||
device_id=self._device_id,
|
||||
satellite_id=self._satellite_id,
|
||||
language=input_language,
|
||||
agent_id=self.intent_agent.id,
|
||||
extra_system_prompt=conversation_extra_system_prompt,
|
||||
)
|
||||
|
||||
agent_id = self.intent_agent.id
|
||||
processed_locally = agent_id == conversation.HOME_ASSISTANT_AGENT
|
||||
all_targets_in_satellite_area = False
|
||||
intent_response: intent.IntentResponse | None = None
|
||||
if not processed_locally and not self._intent_agent_only:
|
||||
# Sentence triggers override conversation agent
|
||||
if (
|
||||
trigger_response_text
|
||||
:= await conversation.async_handle_sentence_triggers(
|
||||
self.hass, user_input
|
||||
)
|
||||
) is not None:
|
||||
# Sentence trigger matched
|
||||
agent_id = "sentence_trigger"
|
||||
processed_locally = True
|
||||
intent_response = intent.IntentResponse(
|
||||
self.pipeline.conversation_language
|
||||
)
|
||||
intent_response.async_set_speech(trigger_response_text)
|
||||
|
||||
intent_filter: Callable[[RecognizeResult], bool] | None = None
|
||||
# If the LLM has API access, we filter out some sentences that are
|
||||
# interfering with LLM operation.
|
||||
if (
|
||||
intent_agent_state := self.hass.states.get(self.intent_agent.id)
|
||||
) and intent_agent_state.attributes.get(
|
||||
ATTR_SUPPORTED_FEATURES, 0
|
||||
) & conversation.ConversationEntityFeature.CONTROL:
|
||||
intent_filter = _async_local_fallback_intent_filter
|
||||
|
||||
# Try local intents
|
||||
if (
|
||||
intent_response is None
|
||||
and self.pipeline.prefer_local_intents
|
||||
and (
|
||||
intent_response := await conversation.async_handle_intents(
|
||||
self.hass,
|
||||
user_input,
|
||||
intent_filter=intent_filter,
|
||||
)
|
||||
)
|
||||
):
|
||||
# Local intent matched
|
||||
agent_id = conversation.HOME_ASSISTANT_AGENT
|
||||
processed_locally = True
|
||||
|
||||
if self.tts_stream and self.tts_stream.supports_streaming_input:
|
||||
tts_input_stream: asyncio.Queue[str | None] | None = asyncio.Queue()
|
||||
else:
|
||||
@@ -1265,6 +1208,17 @@ class PipelineRun:
|
||||
assert self.tts_stream is not None
|
||||
self.tts_stream.async_set_message_stream(tts_input_stream_generator())
|
||||
|
||||
user_input = conversation.ConversationInput(
|
||||
text=intent_input,
|
||||
context=self.context,
|
||||
conversation_id=conversation_id,
|
||||
device_id=self._device_id,
|
||||
satellite_id=self._satellite_id,
|
||||
language=input_language,
|
||||
agent_id=self.intent_agent.id,
|
||||
extra_system_prompt=conversation_extra_system_prompt,
|
||||
)
|
||||
|
||||
with (
|
||||
chat_session.async_get_chat_session(
|
||||
self.hass, user_input.conversation_id
|
||||
@@ -1276,6 +1230,53 @@ class PipelineRun:
|
||||
chat_log_delta_listener=chat_log_delta_listener,
|
||||
) as chat_log,
|
||||
):
|
||||
agent_id = self.intent_agent.id
|
||||
processed_locally = agent_id == conversation.HOME_ASSISTANT_AGENT
|
||||
all_targets_in_satellite_area = False
|
||||
intent_response: intent.IntentResponse | None = None
|
||||
if not processed_locally and not self._intent_agent_only:
|
||||
# Sentence triggers override conversation agent
|
||||
if (
|
||||
trigger_response_text
|
||||
:= await conversation.async_handle_sentence_triggers(
|
||||
self.hass, user_input, chat_log
|
||||
)
|
||||
) is not None:
|
||||
# Sentence trigger matched
|
||||
agent_id = "sentence_trigger"
|
||||
processed_locally = True
|
||||
intent_response = intent.IntentResponse(
|
||||
self.pipeline.conversation_language
|
||||
)
|
||||
intent_response.async_set_speech(trigger_response_text)
|
||||
|
||||
intent_filter: Callable[[RecognizeResult], bool] | None = None
|
||||
# If the LLM has API access, we filter out some sentences that are
|
||||
# interfering with LLM operation.
|
||||
if (
|
||||
intent_agent_state := self.hass.states.get(self.intent_agent.id)
|
||||
) and intent_agent_state.attributes.get(
|
||||
ATTR_SUPPORTED_FEATURES, 0
|
||||
) & conversation.ConversationEntityFeature.CONTROL:
|
||||
intent_filter = _async_local_fallback_intent_filter
|
||||
|
||||
# Try local intents
|
||||
if (
|
||||
intent_response is None
|
||||
and self.pipeline.prefer_local_intents
|
||||
and (
|
||||
intent_response := await conversation.async_handle_intents(
|
||||
self.hass,
|
||||
user_input,
|
||||
chat_log,
|
||||
intent_filter=intent_filter,
|
||||
)
|
||||
)
|
||||
):
|
||||
# Local intent matched
|
||||
agent_id = conversation.HOME_ASSISTANT_AGENT
|
||||
processed_locally = True
|
||||
|
||||
# It was already handled, create response and add to chat history
|
||||
if intent_response is not None:
|
||||
speech: str = intent_response.speech.get("plain", {}).get(
|
||||
|
||||
@@ -236,7 +236,9 @@ async def async_prepare_agent(
|
||||
|
||||
|
||||
async def async_handle_sentence_triggers(
|
||||
hass: HomeAssistant, user_input: ConversationInput
|
||||
hass: HomeAssistant,
|
||||
user_input: ConversationInput,
|
||||
chat_log: ChatLog,
|
||||
) -> str | None:
|
||||
"""Try to match input against sentence triggers and return response text.
|
||||
|
||||
@@ -245,12 +247,13 @@ async def async_handle_sentence_triggers(
|
||||
agent = get_agent_manager(hass).default_agent
|
||||
assert agent is not None
|
||||
|
||||
return await agent.async_handle_sentence_triggers(user_input)
|
||||
return await agent.async_handle_sentence_triggers(user_input, chat_log)
|
||||
|
||||
|
||||
async def async_handle_intents(
|
||||
hass: HomeAssistant,
|
||||
user_input: ConversationInput,
|
||||
chat_log: ChatLog,
|
||||
*,
|
||||
intent_filter: Callable[[RecognizeResult], bool] | None = None,
|
||||
) -> intent.IntentResponse | None:
|
||||
@@ -261,7 +264,9 @@ async def async_handle_intents(
|
||||
agent = get_agent_manager(hass).default_agent
|
||||
assert agent is not None
|
||||
|
||||
return await agent.async_handle_intents(user_input, intent_filter=intent_filter)
|
||||
return await agent.async_handle_intents(
|
||||
user_input, chat_log, intent_filter=intent_filter
|
||||
)
|
||||
|
||||
|
||||
async def async_setup(hass: HomeAssistant, config: ConfigType) -> bool:
|
||||
|
||||
@@ -431,26 +431,14 @@ class DefaultAgent(ConversationEntity):
|
||||
) -> ConversationResult:
|
||||
"""Handle a message."""
|
||||
response: intent.IntentResponse | None = None
|
||||
tool_input: llm.ToolInput | None = None
|
||||
tool_result: dict[str, Any] = {}
|
||||
|
||||
# Check if a trigger matched
|
||||
if trigger_result := await self.async_recognize_sentence_trigger(user_input):
|
||||
# Process callbacks and get response
|
||||
response_text = await self._handle_trigger_result(
|
||||
trigger_result, user_input
|
||||
trigger_result, user_input, chat_log
|
||||
)
|
||||
|
||||
# Create tool result
|
||||
tool_input = llm.ToolInput(
|
||||
tool_name="trigger_sentence",
|
||||
tool_args={},
|
||||
external=True,
|
||||
)
|
||||
tool_result = {
|
||||
"response": response_text,
|
||||
}
|
||||
|
||||
# Convert to conversation result
|
||||
response = intent.IntentResponse(
|
||||
language=user_input.language or self.hass.config.language
|
||||
@@ -462,40 +450,7 @@ class DefaultAgent(ConversationEntity):
|
||||
intent_result = await self.async_recognize_intent(user_input)
|
||||
|
||||
response = await self._async_process_intent_result(
|
||||
intent_result, user_input
|
||||
)
|
||||
|
||||
if response.response_type != intent.IntentResponseType.ERROR:
|
||||
assert intent_result is not None
|
||||
assert intent_result.intent is not None
|
||||
# Create external tool call for the intent
|
||||
tool_input = llm.ToolInput(
|
||||
tool_name=intent_result.intent.name,
|
||||
tool_args={
|
||||
entity.name: entity.value or entity.text
|
||||
for entity in intent_result.entities_list
|
||||
},
|
||||
external=True,
|
||||
)
|
||||
# Create tool result from intent response
|
||||
tool_result = llm.IntentResponseDict(response)
|
||||
|
||||
# Add tool call and result to chat log if we have one
|
||||
if tool_input is not None:
|
||||
chat_log.async_add_assistant_content_without_tools(
|
||||
AssistantContent(
|
||||
agent_id=user_input.agent_id,
|
||||
content=None,
|
||||
tool_calls=[tool_input],
|
||||
)
|
||||
)
|
||||
chat_log.async_add_assistant_content_without_tools(
|
||||
ToolResultContent(
|
||||
agent_id=user_input.agent_id,
|
||||
tool_call_id=tool_input.id,
|
||||
tool_name=tool_input.tool_name,
|
||||
tool_result=tool_result,
|
||||
)
|
||||
intent_result, user_input, chat_log
|
||||
)
|
||||
|
||||
speech: str = response.speech.get("plain", {}).get("speech", "")
|
||||
@@ -514,6 +469,7 @@ class DefaultAgent(ConversationEntity):
|
||||
self,
|
||||
result: RecognizeResult | None,
|
||||
user_input: ConversationInput,
|
||||
chat_log: ChatLog,
|
||||
) -> intent.IntentResponse:
|
||||
"""Process user input with intents."""
|
||||
language = user_input.language or self.hass.config.language
|
||||
@@ -576,12 +532,21 @@ class DefaultAgent(ConversationEntity):
|
||||
ConversationTraceEventType.TOOL_CALL,
|
||||
{
|
||||
"intent_name": result.intent.name,
|
||||
"slots": {
|
||||
entity.name: entity.value or entity.text
|
||||
for entity in result.entities_list
|
||||
},
|
||||
"slots": {entity.name: entity.value for entity in result.entities_list},
|
||||
},
|
||||
)
|
||||
tool_input = llm.ToolInput(
|
||||
tool_name=result.intent.name,
|
||||
tool_args={entity.name: entity.value for entity in result.entities_list},
|
||||
external=True,
|
||||
)
|
||||
chat_log.async_add_assistant_content_without_tools(
|
||||
AssistantContent(
|
||||
agent_id=user_input.agent_id,
|
||||
content=None,
|
||||
tool_calls=[tool_input],
|
||||
)
|
||||
)
|
||||
|
||||
try:
|
||||
intent_response = await intent.async_handle(
|
||||
@@ -644,6 +609,16 @@ class DefaultAgent(ConversationEntity):
|
||||
)
|
||||
intent_response.async_set_speech(speech)
|
||||
|
||||
tool_result = llm.IntentResponseDict(intent_response)
|
||||
chat_log.async_add_assistant_content_without_tools(
|
||||
ToolResultContent(
|
||||
agent_id=user_input.agent_id,
|
||||
tool_call_id=tool_input.id,
|
||||
tool_name=tool_input.tool_name,
|
||||
tool_result=tool_result,
|
||||
)
|
||||
)
|
||||
|
||||
return intent_response
|
||||
|
||||
def _recognize(
|
||||
@@ -1570,16 +1545,31 @@ class DefaultAgent(ConversationEntity):
|
||||
)
|
||||
|
||||
async def _handle_trigger_result(
|
||||
self, result: SentenceTriggerResult, user_input: ConversationInput
|
||||
self,
|
||||
result: SentenceTriggerResult,
|
||||
user_input: ConversationInput,
|
||||
chat_log: ChatLog,
|
||||
) -> str:
|
||||
"""Run sentence trigger callbacks and return response text."""
|
||||
|
||||
# Gather callback responses in parallel
|
||||
trigger_callbacks = [
|
||||
self._triggers_details[trigger_id].callback(user_input, trigger_result)
|
||||
for trigger_id, trigger_result in result.matched_triggers.items()
|
||||
]
|
||||
|
||||
tool_input = llm.ToolInput(
|
||||
tool_name="trigger_sentence",
|
||||
tool_args={},
|
||||
external=True,
|
||||
)
|
||||
chat_log.async_add_assistant_content_without_tools(
|
||||
AssistantContent(
|
||||
agent_id=user_input.agent_id,
|
||||
content=None,
|
||||
tool_calls=[tool_input],
|
||||
)
|
||||
)
|
||||
|
||||
# Use first non-empty result as response.
|
||||
#
|
||||
# There may be multiple copies of a trigger running when editing in
|
||||
@@ -1608,23 +1598,38 @@ class DefaultAgent(ConversationEntity):
|
||||
f"component.{DOMAIN}.conversation.agent.done", "Done"
|
||||
)
|
||||
|
||||
tool_result: dict[str, Any] = {"response": response_text}
|
||||
chat_log.async_add_assistant_content_without_tools(
|
||||
ToolResultContent(
|
||||
agent_id=user_input.agent_id,
|
||||
tool_call_id=tool_input.id,
|
||||
tool_name=tool_input.tool_name,
|
||||
tool_result=tool_result,
|
||||
)
|
||||
)
|
||||
|
||||
return response_text
|
||||
|
||||
async def async_handle_sentence_triggers(
|
||||
self, user_input: ConversationInput
|
||||
self,
|
||||
user_input: ConversationInput,
|
||||
chat_log: ChatLog,
|
||||
) -> str | None:
|
||||
"""Try to input sentence against sentence triggers and return response text.
|
||||
|
||||
Returns None if no match occurred.
|
||||
"""
|
||||
if trigger_result := await self.async_recognize_sentence_trigger(user_input):
|
||||
return await self._handle_trigger_result(trigger_result, user_input)
|
||||
return await self._handle_trigger_result(
|
||||
trigger_result, user_input, chat_log
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
async def async_handle_intents(
|
||||
self,
|
||||
user_input: ConversationInput,
|
||||
chat_log: ChatLog,
|
||||
*,
|
||||
intent_filter: Callable[[RecognizeResult], bool] | None = None,
|
||||
) -> intent.IntentResponse | None:
|
||||
@@ -1640,7 +1645,7 @@ class DefaultAgent(ConversationEntity):
|
||||
# No error message on failed match
|
||||
return None
|
||||
|
||||
response = await self._async_process_intent_result(result, user_input)
|
||||
response = await self._async_process_intent_result(result, user_input, chat_log)
|
||||
if (
|
||||
response.response_type == intent.IntentResponseType.ERROR
|
||||
and response.error_code
|
||||
|
||||
@@ -3181,13 +3181,17 @@ async def test_handle_intents_with_response_errors(
|
||||
agent_id=None,
|
||||
)
|
||||
|
||||
with patch(
|
||||
"homeassistant.components.conversation.default_agent.DefaultAgent._async_process_intent_result",
|
||||
return_value=default_agent._make_error_result(
|
||||
user_input.language, error_code, "Mock error message"
|
||||
),
|
||||
) as mock_process:
|
||||
response = await agent.async_handle_intents(user_input)
|
||||
with (
|
||||
patch(
|
||||
"homeassistant.components.conversation.default_agent.DefaultAgent._async_process_intent_result",
|
||||
return_value=default_agent._make_error_result(
|
||||
user_input.language, error_code, "Mock error message"
|
||||
),
|
||||
) as mock_process,
|
||||
chat_session.async_get_chat_session(hass) as session,
|
||||
async_get_chat_log(hass, session, user_input) as chat_log,
|
||||
):
|
||||
response = await agent.async_handle_intents(user_input, chat_log)
|
||||
|
||||
assert len(mock_process.mock_calls) == 1
|
||||
|
||||
@@ -3240,9 +3244,11 @@ async def test_handle_intents_filters_results(
|
||||
patch(
|
||||
"homeassistant.components.conversation.default_agent.DefaultAgent._async_process_intent_result",
|
||||
) as mock_process,
|
||||
chat_session.async_get_chat_session(hass) as session,
|
||||
async_get_chat_log(hass, session, user_input) as chat_log,
|
||||
):
|
||||
response = await agent.async_handle_intents(
|
||||
user_input, intent_filter=_filter_intents
|
||||
user_input, chat_log, intent_filter=_filter_intents
|
||||
)
|
||||
|
||||
assert len(mock_recognize.mock_calls) == 1
|
||||
@@ -3257,7 +3263,7 @@ async def test_handle_intents_filters_results(
|
||||
|
||||
# Second time it is not filtered
|
||||
response = await agent.async_handle_intents(
|
||||
user_input, intent_filter=_filter_intents
|
||||
user_input, chat_log, intent_filter=_filter_intents
|
||||
)
|
||||
|
||||
assert len(mock_recognize.mock_calls) == 2
|
||||
|
||||
@@ -11,6 +11,7 @@ from homeassistant.components import conversation
|
||||
from homeassistant.components.conversation import (
|
||||
ConversationInput,
|
||||
async_get_agent,
|
||||
async_get_chat_log,
|
||||
async_handle_intents,
|
||||
async_handle_sentence_triggers,
|
||||
default_agent,
|
||||
@@ -19,7 +20,7 @@ from homeassistant.components.conversation.const import HOME_ASSISTANT_AGENT
|
||||
from homeassistant.components.light import DOMAIN as LIGHT_DOMAIN
|
||||
from homeassistant.core import Context, HomeAssistant
|
||||
from homeassistant.exceptions import HomeAssistantError
|
||||
from homeassistant.helpers import intent
|
||||
from homeassistant.helpers import chat_session, intent
|
||||
from homeassistant.setup import async_setup_component
|
||||
|
||||
from . import MockAgent
|
||||
@@ -288,18 +289,22 @@ async def test_async_handle_sentence_triggers(
|
||||
|
||||
# Device id will be available in response template
|
||||
device_id = "1234"
|
||||
actual_response = await async_handle_sentence_triggers(
|
||||
hass,
|
||||
ConversationInput(
|
||||
text="my trigger",
|
||||
context=Context(),
|
||||
conversation_id=None,
|
||||
agent_id=conversation.HOME_ASSISTANT_AGENT,
|
||||
device_id=device_id,
|
||||
satellite_id=None,
|
||||
language=hass.config.language,
|
||||
),
|
||||
user_input = ConversationInput(
|
||||
text="my trigger",
|
||||
context=Context(),
|
||||
conversation_id=None,
|
||||
agent_id=conversation.HOME_ASSISTANT_AGENT,
|
||||
device_id=device_id,
|
||||
satellite_id=None,
|
||||
language=hass.config.language,
|
||||
)
|
||||
with (
|
||||
chat_session.async_get_chat_session(hass) as session,
|
||||
async_get_chat_log(hass, session, user_input) as chat_log,
|
||||
):
|
||||
actual_response = await async_handle_sentence_triggers(
|
||||
hass, user_input, chat_log
|
||||
)
|
||||
assert actual_response == expected_response
|
||||
|
||||
|
||||
@@ -326,34 +331,38 @@ async def test_async_handle_intents(hass: HomeAssistant) -> None:
|
||||
intent.async_register(hass, handler)
|
||||
|
||||
# Registered intent will be handled
|
||||
result = await async_handle_intents(
|
||||
hass,
|
||||
ConversationInput(
|
||||
text="I'd like to order a stout",
|
||||
context=Context(),
|
||||
agent_id=conversation.HOME_ASSISTANT_AGENT,
|
||||
conversation_id=None,
|
||||
device_id=None,
|
||||
satellite_id=None,
|
||||
language=hass.config.language,
|
||||
),
|
||||
user_input = ConversationInput(
|
||||
text="I'd like to order a stout",
|
||||
context=Context(),
|
||||
agent_id=conversation.HOME_ASSISTANT_AGENT,
|
||||
conversation_id=None,
|
||||
device_id=None,
|
||||
satellite_id=None,
|
||||
language=hass.config.language,
|
||||
)
|
||||
with (
|
||||
chat_session.async_get_chat_session(hass) as session,
|
||||
async_get_chat_log(hass, session, user_input) as chat_log,
|
||||
):
|
||||
result = await async_handle_intents(hass, user_input, chat_log)
|
||||
assert result is not None
|
||||
assert result.intent is not None
|
||||
assert result.intent.intent_type == handler.intent_type
|
||||
assert handler.was_handled
|
||||
|
||||
# No error messages, just None as a result
|
||||
result = await async_handle_intents(
|
||||
hass,
|
||||
ConversationInput(
|
||||
text="this sentence does not exist",
|
||||
agent_id=conversation.HOME_ASSISTANT_AGENT,
|
||||
context=Context(),
|
||||
conversation_id=None,
|
||||
device_id=None,
|
||||
satellite_id=None,
|
||||
language=hass.config.language,
|
||||
),
|
||||
user_input2 = ConversationInput(
|
||||
text="this sentence does not exist",
|
||||
agent_id=conversation.HOME_ASSISTANT_AGENT,
|
||||
context=Context(),
|
||||
conversation_id=None,
|
||||
device_id=None,
|
||||
satellite_id=None,
|
||||
language=hass.config.language,
|
||||
)
|
||||
with (
|
||||
chat_session.async_get_chat_session(hass) as session,
|
||||
async_get_chat_log(hass, session, user_input2) as chat_log,
|
||||
):
|
||||
result = await async_handle_intents(hass, user_input2, chat_log)
|
||||
assert result is None
|
||||
|
||||
Reference in New Issue
Block a user