Add tools in default agent also in fallback pipeline (#157441)

2025-12-24 21:06:19 +00:00 · 2025-11-28 09:47:52 +01:00
parent ef36d7b1e5
commit 9f3dae6254
5 changed files with 185 additions and 159 deletions
--- a/homeassistant/components/assist_pipeline/pipeline.py
+++ b/homeassistant/components/assist_pipeline/pipeline.py
@@ -1123,63 +1123,6 @@ class PipelineRun:
        )

        try:
-            user_input = conversation.ConversationInput(
-                text=intent_input,
-                context=self.context,
-                conversation_id=conversation_id,
-                device_id=self._device_id,
-                satellite_id=self._satellite_id,
-                language=input_language,
-                agent_id=self.intent_agent.id,
-                extra_system_prompt=conversation_extra_system_prompt,
-            )
-
-            agent_id = self.intent_agent.id
-            processed_locally = agent_id == conversation.HOME_ASSISTANT_AGENT
-            all_targets_in_satellite_area = False
-            intent_response: intent.IntentResponse | None = None
-            if not processed_locally and not self._intent_agent_only:
-                # Sentence triggers override conversation agent
-                if (
-                    trigger_response_text
-                    := await conversation.async_handle_sentence_triggers(
-                        self.hass, user_input
-                    )
-                ) is not None:
-                    # Sentence trigger matched
-                    agent_id = "sentence_trigger"
-                    processed_locally = True
-                    intent_response = intent.IntentResponse(
-                        self.pipeline.conversation_language
-                    )
-                    intent_response.async_set_speech(trigger_response_text)
-
-                intent_filter: Callable[[RecognizeResult], bool] | None = None
-                # If the LLM has API access, we filter out some sentences that are
-                # interfering with LLM operation.
-                if (
-                    intent_agent_state := self.hass.states.get(self.intent_agent.id)
-                ) and intent_agent_state.attributes.get(
-                    ATTR_SUPPORTED_FEATURES, 0
-                ) & conversation.ConversationEntityFeature.CONTROL:
-                    intent_filter = _async_local_fallback_intent_filter
-
-                # Try local intents
-                if (
-                    intent_response is None
-                    and self.pipeline.prefer_local_intents
-                    and (
-                        intent_response := await conversation.async_handle_intents(
-                            self.hass,
-                            user_input,
-                            intent_filter=intent_filter,
-                        )
-                    )
-                ):
-                    # Local intent matched
-                    agent_id = conversation.HOME_ASSISTANT_AGENT
-                    processed_locally = True
-
            if self.tts_stream and self.tts_stream.supports_streaming_input:
                tts_input_stream: asyncio.Queue[str | None] | None = asyncio.Queue()
            else:
@@ -1265,6 +1208,17 @@ class PipelineRun:
                assert self.tts_stream is not None
                self.tts_stream.async_set_message_stream(tts_input_stream_generator())

+            user_input = conversation.ConversationInput(
+                text=intent_input,
+                context=self.context,
+                conversation_id=conversation_id,
+                device_id=self._device_id,
+                satellite_id=self._satellite_id,
+                language=input_language,
+                agent_id=self.intent_agent.id,
+                extra_system_prompt=conversation_extra_system_prompt,
+            )
+
            with (
                chat_session.async_get_chat_session(
                    self.hass, user_input.conversation_id
@@ -1276,6 +1230,53 @@ class PipelineRun:
                    chat_log_delta_listener=chat_log_delta_listener,
                ) as chat_log,
            ):
+                agent_id = self.intent_agent.id
+                processed_locally = agent_id == conversation.HOME_ASSISTANT_AGENT
+                all_targets_in_satellite_area = False
+                intent_response: intent.IntentResponse | None = None
+                if not processed_locally and not self._intent_agent_only:
+                    # Sentence triggers override conversation agent
+                    if (
+                        trigger_response_text
+                        := await conversation.async_handle_sentence_triggers(
+                            self.hass, user_input, chat_log
+                        )
+                    ) is not None:
+                        # Sentence trigger matched
+                        agent_id = "sentence_trigger"
+                        processed_locally = True
+                        intent_response = intent.IntentResponse(
+                            self.pipeline.conversation_language
+                        )
+                        intent_response.async_set_speech(trigger_response_text)
+
+                    intent_filter: Callable[[RecognizeResult], bool] | None = None
+                    # If the LLM has API access, we filter out some sentences that are
+                    # interfering with LLM operation.
+                    if (
+                        intent_agent_state := self.hass.states.get(self.intent_agent.id)
+                    ) and intent_agent_state.attributes.get(
+                        ATTR_SUPPORTED_FEATURES, 0
+                    ) & conversation.ConversationEntityFeature.CONTROL:
+                        intent_filter = _async_local_fallback_intent_filter
+
+                    # Try local intents
+                    if (
+                        intent_response is None
+                        and self.pipeline.prefer_local_intents
+                        and (
+                            intent_response := await conversation.async_handle_intents(
+                                self.hass,
+                                user_input,
+                                chat_log,
+                                intent_filter=intent_filter,
+                            )
+                        )
+                    ):
+                        # Local intent matched
+                        agent_id = conversation.HOME_ASSISTANT_AGENT
+                        processed_locally = True
+
                # It was already handled, create response and add to chat history
                if intent_response is not None:
                    speech: str = intent_response.speech.get("plain", {}).get(
--- a/homeassistant/components/conversation/init.py
+++ b/homeassistant/components/conversation/init.py
@@ -236,7 +236,9 @@ async def async_prepare_agent(


 async def async_handle_sentence_triggers(
-    hass: HomeAssistant, user_input: ConversationInput
+    hass: HomeAssistant,
+    user_input: ConversationInput,
+    chat_log: ChatLog,
 ) -> str | None:
    """Try to match input against sentence triggers and return response text.

@@ -245,12 +247,13 @@ async def async_handle_sentence_triggers(
    agent = get_agent_manager(hass).default_agent
    assert agent is not None

-    return await agent.async_handle_sentence_triggers(user_input)
+    return await agent.async_handle_sentence_triggers(user_input, chat_log)


 async def async_handle_intents(
    hass: HomeAssistant,
    user_input: ConversationInput,
+    chat_log: ChatLog,
    *,
    intent_filter: Callable[[RecognizeResult], bool] | None = None,
 ) -> intent.IntentResponse | None:
@@ -261,7 +264,9 @@ async def async_handle_intents(
    agent = get_agent_manager(hass).default_agent
    assert agent is not None

-    return await agent.async_handle_intents(user_input, intent_filter=intent_filter)
+    return await agent.async_handle_intents(
+        user_input, chat_log, intent_filter=intent_filter
+    )


 async def async_setup(hass: HomeAssistant, config: ConfigType) -> bool:
--- a/homeassistant/components/conversation/default_agent.py
+++ b/homeassistant/components/conversation/default_agent.py
@@ -431,26 +431,14 @@ class DefaultAgent(ConversationEntity):
    ) -> ConversationResult:
        """Handle a message."""
        response: intent.IntentResponse | None = None
-        tool_input: llm.ToolInput | None = None
-        tool_result: dict[str, Any] = {}

        # Check if a trigger matched
        if trigger_result := await self.async_recognize_sentence_trigger(user_input):
            # Process callbacks and get response
            response_text = await self._handle_trigger_result(
-                trigger_result, user_input
+                trigger_result, user_input, chat_log
            )

-            # Create tool result
-            tool_input = llm.ToolInput(
-                tool_name="trigger_sentence",
-                tool_args={},
-                external=True,
-            )
-            tool_result = {
-                "response": response_text,
-            }
-
            # Convert to conversation result
            response = intent.IntentResponse(
                language=user_input.language or self.hass.config.language
@@ -462,40 +450,7 @@ class DefaultAgent(ConversationEntity):
            intent_result = await self.async_recognize_intent(user_input)

            response = await self._async_process_intent_result(
-                intent_result, user_input
-            )
-
-            if response.response_type != intent.IntentResponseType.ERROR:
-                assert intent_result is not None
-                assert intent_result.intent is not None
-                # Create external tool call for the intent
-                tool_input = llm.ToolInput(
-                    tool_name=intent_result.intent.name,
-                    tool_args={
-                        entity.name: entity.value or entity.text
-                        for entity in intent_result.entities_list
-                    },
-                    external=True,
-                )
-                # Create tool result from intent response
-                tool_result = llm.IntentResponseDict(response)
-
-        # Add tool call and result to chat log if we have one
-        if tool_input is not None:
-            chat_log.async_add_assistant_content_without_tools(
-                AssistantContent(
-                    agent_id=user_input.agent_id,
-                    content=None,
-                    tool_calls=[tool_input],
-                )
-            )
-            chat_log.async_add_assistant_content_without_tools(
-                ToolResultContent(
-                    agent_id=user_input.agent_id,
-                    tool_call_id=tool_input.id,
-                    tool_name=tool_input.tool_name,
-                    tool_result=tool_result,
-                )
+                intent_result, user_input, chat_log
            )

        speech: str = response.speech.get("plain", {}).get("speech", "")
@@ -514,6 +469,7 @@ class DefaultAgent(ConversationEntity):
        self,
        result: RecognizeResult | None,
        user_input: ConversationInput,
+        chat_log: ChatLog,
    ) -> intent.IntentResponse:
        """Process user input with intents."""
        language = user_input.language or self.hass.config.language
@@ -576,12 +532,21 @@ class DefaultAgent(ConversationEntity):
            ConversationTraceEventType.TOOL_CALL,
            {
                "intent_name": result.intent.name,
-                "slots": {
-                    entity.name: entity.value or entity.text
-                    for entity in result.entities_list
-                },
+                "slots": {entity.name: entity.value for entity in result.entities_list},
            },
        )
+        tool_input = llm.ToolInput(
+            tool_name=result.intent.name,
+            tool_args={entity.name: entity.value for entity in result.entities_list},
+            external=True,
+        )
+        chat_log.async_add_assistant_content_without_tools(
+            AssistantContent(
+                agent_id=user_input.agent_id,
+                content=None,
+                tool_calls=[tool_input],
+            )
+        )

        try:
            intent_response = await intent.async_handle(
@@ -644,6 +609,16 @@ class DefaultAgent(ConversationEntity):
                )
                intent_response.async_set_speech(speech)

+        tool_result = llm.IntentResponseDict(intent_response)
+        chat_log.async_add_assistant_content_without_tools(
+            ToolResultContent(
+                agent_id=user_input.agent_id,
+                tool_call_id=tool_input.id,
+                tool_name=tool_input.tool_name,
+                tool_result=tool_result,
+            )
+        )
+
        return intent_response

    def _recognize(
@@ -1570,16 +1545,31 @@ class DefaultAgent(ConversationEntity):
        )

    async def _handle_trigger_result(
-        self, result: SentenceTriggerResult, user_input: ConversationInput
+        self,
+        result: SentenceTriggerResult,
+        user_input: ConversationInput,
+        chat_log: ChatLog,
    ) -> str:
        """Run sentence trigger callbacks and return response text."""
-
        # Gather callback responses in parallel
        trigger_callbacks = [
            self._triggers_details[trigger_id].callback(user_input, trigger_result)
            for trigger_id, trigger_result in result.matched_triggers.items()
        ]

+        tool_input = llm.ToolInput(
+            tool_name="trigger_sentence",
+            tool_args={},
+            external=True,
+        )
+        chat_log.async_add_assistant_content_without_tools(
+            AssistantContent(
+                agent_id=user_input.agent_id,
+                content=None,
+                tool_calls=[tool_input],
+            )
+        )
+
        # Use first non-empty result as response.
        #
        # There may be multiple copies of a trigger running when editing in
@@ -1608,23 +1598,38 @@ class DefaultAgent(ConversationEntity):
                f"component.{DOMAIN}.conversation.agent.done", "Done"
            )

+        tool_result: dict[str, Any] = {"response": response_text}
+        chat_log.async_add_assistant_content_without_tools(
+            ToolResultContent(
+                agent_id=user_input.agent_id,
+                tool_call_id=tool_input.id,
+                tool_name=tool_input.tool_name,
+                tool_result=tool_result,
+            )
+        )
+
        return response_text

    async def async_handle_sentence_triggers(
-        self, user_input: ConversationInput
+        self,
+        user_input: ConversationInput,
+        chat_log: ChatLog,
    ) -> str | None:
        """Try to input sentence against sentence triggers and return response text.

        Returns None if no match occurred.
        """
        if trigger_result := await self.async_recognize_sentence_trigger(user_input):
-            return await self._handle_trigger_result(trigger_result, user_input)
+            return await self._handle_trigger_result(
+                trigger_result, user_input, chat_log
+            )

        return None

    async def async_handle_intents(
        self,
        user_input: ConversationInput,
+        chat_log: ChatLog,
        *,
        intent_filter: Callable[[RecognizeResult], bool] | None = None,
    ) -> intent.IntentResponse | None:
@@ -1640,7 +1645,7 @@ class DefaultAgent(ConversationEntity):
            # No error message on failed match
            return None

-        response = await self._async_process_intent_result(result, user_input)
+        response = await self._async_process_intent_result(result, user_input, chat_log)
        if (
            response.response_type == intent.IntentResponseType.ERROR
            and response.error_code
--- a/tests/components/conversation/test_default_agent.py
+++ b/tests/components/conversation/test_default_agent.py
@@ -3181,13 +3181,17 @@ async def test_handle_intents_with_response_errors(
        agent_id=None,
    )

-    with patch(
-        "homeassistant.components.conversation.default_agent.DefaultAgent._async_process_intent_result",
-        return_value=default_agent._make_error_result(
-            user_input.language, error_code, "Mock error message"
-        ),
-    ) as mock_process:
-        response = await agent.async_handle_intents(user_input)
+    with (
+        patch(
+            "homeassistant.components.conversation.default_agent.DefaultAgent._async_process_intent_result",
+            return_value=default_agent._make_error_result(
+                user_input.language, error_code, "Mock error message"
+            ),
+        ) as mock_process,
+        chat_session.async_get_chat_session(hass) as session,
+        async_get_chat_log(hass, session, user_input) as chat_log,
+    ):
+        response = await agent.async_handle_intents(user_input, chat_log)

    assert len(mock_process.mock_calls) == 1

@@ -3240,9 +3244,11 @@ async def test_handle_intents_filters_results(
        patch(
            "homeassistant.components.conversation.default_agent.DefaultAgent._async_process_intent_result",
        ) as mock_process,
+        chat_session.async_get_chat_session(hass) as session,
+        async_get_chat_log(hass, session, user_input) as chat_log,
    ):
        response = await agent.async_handle_intents(
-            user_input, intent_filter=_filter_intents
+            user_input, chat_log, intent_filter=_filter_intents
        )

        assert len(mock_recognize.mock_calls) == 1
@@ -3257,7 +3263,7 @@ async def test_handle_intents_filters_results(

        # Second time it is not filtered
        response = await agent.async_handle_intents(
-            user_input, intent_filter=_filter_intents
+            user_input, chat_log, intent_filter=_filter_intents
        )

        assert len(mock_recognize.mock_calls) == 2
--- a/tests/components/conversation/test_init.py
+++ b/tests/components/conversation/test_init.py
@@ -11,6 +11,7 @@ from homeassistant.components import conversation
 from homeassistant.components.conversation import (
    ConversationInput,
    async_get_agent,
+    async_get_chat_log,
    async_handle_intents,
    async_handle_sentence_triggers,
    default_agent,
@@ -19,7 +20,7 @@ from homeassistant.components.conversation.const import HOME_ASSISTANT_AGENT
 from homeassistant.components.light import DOMAIN as LIGHT_DOMAIN
 from homeassistant.core import Context, HomeAssistant
 from homeassistant.exceptions import HomeAssistantError
-from homeassistant.helpers import intent
+from homeassistant.helpers import chat_session, intent
 from homeassistant.setup import async_setup_component

 from . import MockAgent
@@ -288,18 +289,22 @@ async def test_async_handle_sentence_triggers(

    # Device id will be available in response template
    device_id = "1234"
-    actual_response = await async_handle_sentence_triggers(
-        hass,
-        ConversationInput(
-            text="my trigger",
-            context=Context(),
-            conversation_id=None,
-            agent_id=conversation.HOME_ASSISTANT_AGENT,
-            device_id=device_id,
-            satellite_id=None,
-            language=hass.config.language,
-        ),
+    user_input = ConversationInput(
+        text="my trigger",
+        context=Context(),
+        conversation_id=None,
+        agent_id=conversation.HOME_ASSISTANT_AGENT,
+        device_id=device_id,
+        satellite_id=None,
+        language=hass.config.language,
    )
+    with (
+        chat_session.async_get_chat_session(hass) as session,
+        async_get_chat_log(hass, session, user_input) as chat_log,
+    ):
+        actual_response = await async_handle_sentence_triggers(
+            hass, user_input, chat_log
+        )
    assert actual_response == expected_response


@@ -326,34 +331,38 @@ async def test_async_handle_intents(hass: HomeAssistant) -> None:
    intent.async_register(hass, handler)

    # Registered intent will be handled
-    result = await async_handle_intents(
-        hass,
-        ConversationInput(
-            text="I'd like to order a stout",
-            context=Context(),
-            agent_id=conversation.HOME_ASSISTANT_AGENT,
-            conversation_id=None,
-            device_id=None,
-            satellite_id=None,
-            language=hass.config.language,
-        ),
+    user_input = ConversationInput(
+        text="I'd like to order a stout",
+        context=Context(),
+        agent_id=conversation.HOME_ASSISTANT_AGENT,
+        conversation_id=None,
+        device_id=None,
+        satellite_id=None,
+        language=hass.config.language,
    )
+    with (
+        chat_session.async_get_chat_session(hass) as session,
+        async_get_chat_log(hass, session, user_input) as chat_log,
+    ):
+        result = await async_handle_intents(hass, user_input, chat_log)
    assert result is not None
    assert result.intent is not None
    assert result.intent.intent_type == handler.intent_type
    assert handler.was_handled

    # No error messages, just None as a result
-    result = await async_handle_intents(
-        hass,
-        ConversationInput(
-            text="this sentence does not exist",
-            agent_id=conversation.HOME_ASSISTANT_AGENT,
-            context=Context(),
-            conversation_id=None,
-            device_id=None,
-            satellite_id=None,
-            language=hass.config.language,
-        ),
+    user_input2 = ConversationInput(
+        text="this sentence does not exist",
+        agent_id=conversation.HOME_ASSISTANT_AGENT,
+        context=Context(),
+        conversation_id=None,
+        device_id=None,
+        satellite_id=None,
+        language=hass.config.language,
    )
+    with (
+        chat_session.async_get_chat_session(hass) as session,
+        async_get_chat_log(hass, session, user_input2) as chat_log,
+    ):
+        result = await async_handle_intents(hass, user_input2, chat_log)
    assert result is None