Update OpenAI suggested prompt to not include citations (#154292)

Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io> Co-authored-by: Paulus Schoutsen <balloob@gmail.com>
2025-12-25 05:26:47 +00:00 · 2025-11-10 00:06:43 +03:00
parent 6ee71dae35
commit 8d50754056
7 changed files with 133 additions and 7 deletions
--- a/homeassistant/components/openai_conversation/config_flow.py
+++ b/homeassistant/components/openai_conversation/config_flow.py
@@ -55,6 +55,7 @@ from .const import (
    CONF_WEB_SEARCH_CITY,
    CONF_WEB_SEARCH_CONTEXT_SIZE,
    CONF_WEB_SEARCH_COUNTRY,
+    CONF_WEB_SEARCH_INLINE_CITATIONS,
    CONF_WEB_SEARCH_REGION,
    CONF_WEB_SEARCH_TIMEZONE,
    CONF_WEB_SEARCH_USER_LOCATION,
@@ -73,6 +74,7 @@ from .const import (
    RECOMMENDED_VERBOSITY,
    RECOMMENDED_WEB_SEARCH,
    RECOMMENDED_WEB_SEARCH_CONTEXT_SIZE,
+    RECOMMENDED_WEB_SEARCH_INLINE_CITATIONS,
    RECOMMENDED_WEB_SEARCH_USER_LOCATION,
    UNSUPPORTED_IMAGE_MODELS,
    UNSUPPORTED_MODELS,
@@ -396,6 +398,10 @@ class OpenAISubentryFlowHandler(ConfigSubentryFlow):
                        CONF_WEB_SEARCH_USER_LOCATION,
                        default=RECOMMENDED_WEB_SEARCH_USER_LOCATION,
                    ): bool,
+                    vol.Optional(
+                        CONF_WEB_SEARCH_INLINE_CITATIONS,
+                        default=RECOMMENDED_WEB_SEARCH_INLINE_CITATIONS,
+                    ): bool,
                }
            )
        elif CONF_WEB_SEARCH in options:
@@ -411,6 +417,7 @@ class OpenAISubentryFlowHandler(ConfigSubentryFlow):
                    CONF_WEB_SEARCH_REGION,
                    CONF_WEB_SEARCH_COUNTRY,
                    CONF_WEB_SEARCH_TIMEZONE,
+                    CONF_WEB_SEARCH_INLINE_CITATIONS,
                )
            }

--- a/homeassistant/components/openai_conversation/const.py
+++ b/homeassistant/components/openai_conversation/const.py
@@ -30,6 +30,7 @@ CONF_WEB_SEARCH_CITY = "city"
 CONF_WEB_SEARCH_REGION = "region"
 CONF_WEB_SEARCH_COUNTRY = "country"
 CONF_WEB_SEARCH_TIMEZONE = "timezone"
+CONF_WEB_SEARCH_INLINE_CITATIONS = "inline_citations"
 RECOMMENDED_CODE_INTERPRETER = False
 RECOMMENDED_CHAT_MODEL = "gpt-4o-mini"
 RECOMMENDED_IMAGE_MODEL = "gpt-image-1"
@@ -41,6 +42,7 @@ RECOMMENDED_VERBOSITY = "medium"
 RECOMMENDED_WEB_SEARCH = False
 RECOMMENDED_WEB_SEARCH_CONTEXT_SIZE = "medium"
 RECOMMENDED_WEB_SEARCH_USER_LOCATION = False
+RECOMMENDED_WEB_SEARCH_INLINE_CITATIONS = False

 UNSUPPORTED_MODELS: list[str] = [
    "o1-mini",
--- a/homeassistant/components/openai_conversation/entity.py
+++ b/homeassistant/components/openai_conversation/entity.py
@@ -7,6 +7,7 @@ from collections.abc import AsyncGenerator, Callable, Iterable
 import json
 from mimetypes import guess_file_type
 from pathlib import Path
+import re
 from typing import TYPE_CHECKING, Any, Literal, cast

 import openai
@@ -29,6 +30,7 @@ from openai.types.responses import (
    ResponseInputImageParam,
    ResponseInputMessageContentListParam,
    ResponseInputParam,
+    ResponseInputTextParam,
    ResponseOutputItemAddedEvent,
    ResponseOutputItemDoneEvent,
    ResponseOutputMessage,
@@ -77,6 +79,7 @@ from .const import (
    CONF_WEB_SEARCH_CITY,
    CONF_WEB_SEARCH_CONTEXT_SIZE,
    CONF_WEB_SEARCH_COUNTRY,
+    CONF_WEB_SEARCH_INLINE_CITATIONS,
    CONF_WEB_SEARCH_REGION,
    CONF_WEB_SEARCH_TIMEZONE,
    CONF_WEB_SEARCH_USER_LOCATION,
@@ -90,6 +93,7 @@ from .const import (
    RECOMMENDED_TOP_P,
    RECOMMENDED_VERBOSITY,
    RECOMMENDED_WEB_SEARCH_CONTEXT_SIZE,
+    RECOMMENDED_WEB_SEARCH_INLINE_CITATIONS,
 )

 if TYPE_CHECKING:
@@ -251,6 +255,7 @@ def _convert_content_to_param(
 async def _transform_stream(  # noqa: C901 - This is complex, but better to have it in one place
    chat_log: conversation.ChatLog,
    stream: AsyncStream[ResponseStreamEvent],
+    remove_citations: bool = False,
 ) -> AsyncGenerator[
    conversation.AssistantContentDeltaDict | conversation.ToolResultContentDeltaDict
 ]:
@@ -258,6 +263,13 @@ async def _transform_stream(  # noqa: C901 - This is complex, but better to have
    last_summary_index = None
    last_role: Literal["assistant", "tool_result"] | None = None

+    # Non-reasoning models don't follow our request to remove citations, so we remove
+    # them manually here. They always follow the same pattern: the citation is always
+    # in parentheses in Markdown format, the citation is always in a single delta event,
+    # and sometimes the closing parenthesis is split into a separate delta event.
+    remove_parentheses: bool = False
+    citation_regexp = re.compile(r"\(\[([^\]]+)\]\((https?:\/\/[^\)]+)\)")
+
    async for event in stream:
        LOGGER.debug("Received event: %s", event)

@@ -344,7 +356,23 @@ async def _transform_stream(  # noqa: C901 - This is complex, but better to have
                yield {"native": event.item}
                last_summary_index = -1  # Trigger new assistant message on next turn
        elif isinstance(event, ResponseTextDeltaEvent):
-            yield {"content": event.delta}
+            data = event.delta
+            if remove_parentheses:
+                data = data.removeprefix(")")
+                remove_parentheses = False
+            elif remove_citations and (match := citation_regexp.search(data)):
+                match_start, match_end = match.span()
+                # remove leading space if any
+                if data[match_start - 1 : match_start] == " ":
+                    match_start -= 1
+                # remove closing parenthesis:
+                if data[match_end : match_end + 1] == ")":
+                    match_end += 1
+                else:
+                    remove_parentheses = True
+                data = data[:match_start] + data[match_end:]
+            if data:
+                yield {"content": data}
        elif isinstance(event, ResponseReasoningSummaryTextDeltaEvent):
            # OpenAI can output several reasoning summaries
            # in a single ResponseReasoningItem. We split them as separate
@@ -489,6 +517,7 @@ class OpenAIBaseLLMEntity(Entity):
                for tool in chat_log.llm_api.tools
            ]

+        remove_citations = False
        if options.get(CONF_WEB_SEARCH):
            web_search = WebSearchToolParam(
                type="web_search",
@@ -504,6 +533,27 @@ class OpenAIBaseLLMEntity(Entity):
                    country=options.get(CONF_WEB_SEARCH_COUNTRY, ""),
                    timezone=options.get(CONF_WEB_SEARCH_TIMEZONE, ""),
                )
+            if not options.get(
+                CONF_WEB_SEARCH_INLINE_CITATIONS,
+                RECOMMENDED_WEB_SEARCH_INLINE_CITATIONS,
+            ):
+                system_message = cast(EasyInputMessageParam, messages[0])
+                content = system_message["content"]
+                if isinstance(content, str):
+                    system_message["content"] = [
+                        ResponseInputTextParam(type="input_text", text=content)
+                    ]
+                system_message["content"].append(  # type: ignore[union-attr]
+                    ResponseInputTextParam(
+                        type="input_text",
+                        text="When doing a web search, do not include source citations",
+                    )
+                )
+
+                if "reasoning" not in model_args:
+                    # Reasoning models handle this correctly with just a prompt
+                    remove_citations = True
+
            tools.append(web_search)

        if options.get(CONF_CODE_INTERPRETER):
@@ -573,7 +623,8 @@ class OpenAIBaseLLMEntity(Entity):
                        [
                            content
                            async for content in chat_log.async_add_delta_content_stream(
-                                self.entity_id, _transform_stream(chat_log, stream)
+                                self.entity_id,
+                                _transform_stream(chat_log, stream, remove_citations),
                            )
                        ]
                    )
--- a/homeassistant/components/openai_conversation/strings.json
+++ b/homeassistant/components/openai_conversation/strings.json
@@ -51,6 +51,7 @@
          "data": {
            "code_interpreter": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data::code_interpreter%]",
            "image_model": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data::image_model%]",
+            "inline_citations": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data::inline_citations%]",
            "reasoning_effort": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data::reasoning_effort%]",
            "search_context_size": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data::search_context_size%]",
            "user_location": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data::user_location%]",
@@ -59,6 +60,7 @@
          "data_description": {
            "code_interpreter": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data_description::code_interpreter%]",
            "image_model": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data_description::image_model%]",
+            "inline_citations": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data_description::inline_citations%]",
            "reasoning_effort": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data_description::reasoning_effort%]",
            "search_context_size": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data_description::search_context_size%]",
            "user_location": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data_description::user_location%]",
@@ -74,7 +76,6 @@
        "reconfigure_successful": "[%key:common::config_flow::abort::reconfigure_successful%]"
      },
      "entry_type": "Conversation agent",
-
      "error": {
        "model_not_supported": "This model is not supported, please select a different model",
        "web_search_minimal_reasoning": "Web search is currently not supported with minimal reasoning effort"
@@ -108,6 +109,7 @@
          "data": {
            "code_interpreter": "Enable code interpreter tool",
            "image_model": "Image generation model",
+            "inline_citations": "Include links in web search results",
            "reasoning_effort": "Reasoning effort",
            "search_context_size": "Search context size",
            "user_location": "Include home location",
@@ -116,6 +118,7 @@
          "data_description": {
            "code_interpreter": "This tool, also known as the python tool to the model, allows it to run code to answer questions",
            "image_model": "The model to use when generating images",
+            "inline_citations": "If disabled, additional prompt is added to ask the model to not include source citations",
            "reasoning_effort": "How many reasoning tokens the model should generate before creating a response to the prompt",
            "search_context_size": "High level guidance for the amount of context window space to use for the search",
            "user_location": "Refine search results based on geography",
--- a/tests/components/openai_conversation/snapshots/test_conversation.ambr
+++ b/tests/components/openai_conversation/snapshots/test_conversation.ambr
@@ -207,7 +207,7 @@
    }),
  ])
 # ---
-# name: test_web_search
+# name: test_web_search[False]
  list([
    dict({
      'content': "What's on the latest news?",
@@ -224,7 +224,40 @@
      'type': 'web_search_call',
    }),
    dict({
-      'content': 'Home Assistant now supports ChatGPT Search in Assist',
+      'content': 'Home Assistant now supports ChatGPT Search in Assist.',
+      'role': 'assistant',
+      'type': 'message',
+    }),
+    dict({
+      'content': 'Thank you!',
+      'role': 'user',
+      'type': 'message',
+    }),
+    dict({
+      'content': 'You are welcome!',
+      'role': 'assistant',
+      'type': 'message',
+    }),
+  ])
+# ---
+# name: test_web_search[True]
+  list([
+    dict({
+      'content': "What's on the latest news?",
+      'role': 'user',
+      'type': 'message',
+    }),
+    dict({
+      'action': dict({
+        'query': 'query',
+        'type': 'search',
+      }),
+      'id': 'ws_A',
+      'status': 'completed',
+      'type': 'web_search_call',
+    }),
+    dict({
+      'content': 'Home Assistant now supports ChatGPT Search in Assist ([release notes](https://www.home-assistant.io/blog/categories/release-notes/)).',
      'role': 'assistant',
      'type': 'message',
    }),
--- a/tests/components/openai_conversation/test_config_flow.py
+++ b/tests/components/openai_conversation/test_config_flow.py
@@ -26,6 +26,7 @@ from homeassistant.components.openai_conversation.const import (
    CONF_WEB_SEARCH_CITY,
    CONF_WEB_SEARCH_CONTEXT_SIZE,
    CONF_WEB_SEARCH_COUNTRY,
+    CONF_WEB_SEARCH_INLINE_CITATIONS,
    CONF_WEB_SEARCH_REGION,
    CONF_WEB_SEARCH_TIMEZONE,
    CONF_WEB_SEARCH_USER_LOCATION,
@@ -404,6 +405,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
                    CONF_WEB_SEARCH: True,
                    CONF_WEB_SEARCH_CONTEXT_SIZE: "low",
                    CONF_WEB_SEARCH_USER_LOCATION: False,
+                    CONF_WEB_SEARCH_INLINE_CITATIONS: True,
                    CONF_CODE_INTERPRETER: False,
                },
            ),
@@ -417,6 +419,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
                CONF_WEB_SEARCH: True,
                CONF_WEB_SEARCH_CONTEXT_SIZE: "low",
                CONF_WEB_SEARCH_USER_LOCATION: False,
+                CONF_WEB_SEARCH_INLINE_CITATIONS: True,
                CONF_CODE_INTERPRETER: False,
            },
        ),
@@ -436,6 +439,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
                CONF_WEB_SEARCH_REGION: "California",
                CONF_WEB_SEARCH_COUNTRY: "US",
                CONF_WEB_SEARCH_TIMEZONE: "America/Los_Angeles",
+                CONF_WEB_SEARCH_INLINE_CITATIONS: True,
                CONF_CODE_INTERPRETER: True,
            },
            (
@@ -453,6 +457,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
                    CONF_WEB_SEARCH: True,
                    CONF_WEB_SEARCH_CONTEXT_SIZE: "low",
                    CONF_WEB_SEARCH_USER_LOCATION: False,
+                    CONF_WEB_SEARCH_INLINE_CITATIONS: True,
                    CONF_CODE_INTERPRETER: True,
                },
            ),
@@ -466,6 +471,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
                CONF_WEB_SEARCH: True,
                CONF_WEB_SEARCH_CONTEXT_SIZE: "low",
                CONF_WEB_SEARCH_USER_LOCATION: False,
+                CONF_WEB_SEARCH_INLINE_CITATIONS: True,
                CONF_CODE_INTERPRETER: True,
            },
        ),
@@ -483,6 +489,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
                CONF_WEB_SEARCH: False,
                CONF_WEB_SEARCH_CONTEXT_SIZE: "low",
                CONF_WEB_SEARCH_USER_LOCATION: False,
+                CONF_WEB_SEARCH_INLINE_CITATIONS: True,
            },
            (
                {
@@ -502,6 +509,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
                    CONF_WEB_SEARCH: False,
                    CONF_WEB_SEARCH_CONTEXT_SIZE: "low",
                    CONF_WEB_SEARCH_USER_LOCATION: False,
+                    CONF_WEB_SEARCH_INLINE_CITATIONS: True,
                },
            ),
            {
@@ -517,6 +525,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
                CONF_WEB_SEARCH: False,
                CONF_WEB_SEARCH_CONTEXT_SIZE: "low",
                CONF_WEB_SEARCH_USER_LOCATION: False,
+                CONF_WEB_SEARCH_INLINE_CITATIONS: True,
            },
        ),
        # Test that old options are removed after reconfiguration
@@ -536,6 +545,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
                CONF_WEB_SEARCH_REGION: "California",
                CONF_WEB_SEARCH_COUNTRY: "US",
                CONF_WEB_SEARCH_TIMEZONE: "America/Los_Angeles",
+                CONF_WEB_SEARCH_INLINE_CITATIONS: True,
            },
            (
                {
@@ -591,6 +601,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
                CONF_WEB_SEARCH_REGION: "California",
                CONF_WEB_SEARCH_COUNTRY: "US",
                CONF_WEB_SEARCH_TIMEZONE: "America/Los_Angeles",
+                CONF_WEB_SEARCH_INLINE_CITATIONS: True,
                CONF_CODE_INTERPRETER: True,
            },
            (
@@ -648,6 +659,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
                    CONF_WEB_SEARCH: True,
                    CONF_WEB_SEARCH_CONTEXT_SIZE: "high",
                    CONF_WEB_SEARCH_USER_LOCATION: False,
+                    CONF_WEB_SEARCH_INLINE_CITATIONS: True,
                    CONF_CODE_INTERPRETER: False,
                },
            ),
@@ -661,6 +673,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
                CONF_WEB_SEARCH: True,
                CONF_WEB_SEARCH_CONTEXT_SIZE: "high",
                CONF_WEB_SEARCH_USER_LOCATION: False,
+                CONF_WEB_SEARCH_INLINE_CITATIONS: True,
                CONF_CODE_INTERPRETER: False,
            },
        ),
@@ -679,6 +692,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
                CONF_WEB_SEARCH: True,
                CONF_WEB_SEARCH_CONTEXT_SIZE: "high",
                CONF_WEB_SEARCH_USER_LOCATION: False,
+                CONF_WEB_SEARCH_INLINE_CITATIONS: True,
            },
            (
                {
@@ -695,6 +709,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
                    CONF_WEB_SEARCH: True,
                    CONF_WEB_SEARCH_CONTEXT_SIZE: "high",
                    CONF_WEB_SEARCH_USER_LOCATION: False,
+                    CONF_WEB_SEARCH_INLINE_CITATIONS: True,
                },
            ),
            {
@@ -708,6 +723,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
                CONF_WEB_SEARCH: True,
                CONF_WEB_SEARCH_CONTEXT_SIZE: "high",
                CONF_WEB_SEARCH_USER_LOCATION: False,
+                CONF_WEB_SEARCH_INLINE_CITATIONS: True,
            },
        ),
    ],
@@ -858,6 +874,7 @@ async def test_subentry_web_search_user_location(
        CONF_WEB_SEARCH_REGION: "California",
        CONF_WEB_SEARCH_COUNTRY: "US",
        CONF_WEB_SEARCH_TIMEZONE: "America/Los_Angeles",
+        CONF_WEB_SEARCH_INLINE_CITATIONS: False,
        CONF_CODE_INTERPRETER: False,
    }

--- a/tests/components/openai_conversation/test_conversation.py
+++ b/tests/components/openai_conversation/test_conversation.py
@@ -21,6 +21,7 @@ from homeassistant.components.openai_conversation.const import (
    CONF_WEB_SEARCH_CITY,
    CONF_WEB_SEARCH_CONTEXT_SIZE,
    CONF_WEB_SEARCH_COUNTRY,
+    CONF_WEB_SEARCH_INLINE_CITATIONS,
    CONF_WEB_SEARCH_REGION,
    CONF_WEB_SEARCH_TIMEZONE,
    CONF_WEB_SEARCH_USER_LOCATION,
@@ -429,6 +430,7 @@ async def test_assist_api_tools_conversion(
    assert tools


+@pytest.mark.parametrize("inline_citations", [True, False])
 async def test_web_search(
    hass: HomeAssistant,
    mock_config_entry: MockConfigEntry,
@@ -436,6 +438,7 @@ async def test_web_search(
    mock_create_stream,
    mock_chat_log: MockChatLog,  # noqa: F811
    snapshot: SnapshotAssertion,
+    inline_citations: bool,
 ) -> None:
    """Test web_search_tool."""
    subentry = next(iter(mock_config_entry.subentries.values()))
@@ -451,11 +454,17 @@ async def test_web_search(
            CONF_WEB_SEARCH_COUNTRY: "US",
            CONF_WEB_SEARCH_REGION: "California",
            CONF_WEB_SEARCH_TIMEZONE: "America/Los_Angeles",
+            CONF_WEB_SEARCH_INLINE_CITATIONS: inline_citations,
        },
    )
    await hass.config_entries.async_reload(mock_config_entry.entry_id)

-    message = "Home Assistant now supports ChatGPT Search in Assist"
+    message = [
+        "Home Assistant now supports ",
+        "ChatGPT Search in Assist",
+        " ([release notes](https://www.home-assistant.io/blog/categories/release-notes/)",
+        ").",
+    ]
    mock_create_stream.return_value = [
        # Initial conversation
        (
@@ -486,7 +495,6 @@ async def test_web_search(
        }
    ]
    assert result.response.response_type == intent.IntentResponseType.ACTION_DONE
-    assert result.response.speech["plain"]["speech"] == message, result.response.speech

    # Test follow-up message in multi-turn conversation
    mock_create_stream.return_value = [
@@ -501,6 +509,11 @@ async def test_web_search(
        agent_id="conversation.openai_conversation",
    )

+    assert (
+        isinstance(mock_create_stream.mock_calls[0][2]["input"][0]["content"], list)
+        and "do not include source citations"
+        in mock_create_stream.mock_calls[0][2]["input"][0]["content"][1]["text"]
+    ) is not inline_citations
    assert mock_create_stream.mock_calls[1][2]["input"][1:] == snapshot