1
0
mirror of https://github.com/home-assistant/core.git synced 2025-12-25 05:26:47 +00:00

Update OpenAI suggested prompt to not include citations (#154292)

Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io>
Co-authored-by: Paulus Schoutsen <balloob@gmail.com>
This commit is contained in:
Denis Shulyaka
2025-11-10 00:06:43 +03:00
committed by GitHub
parent 6ee71dae35
commit 8d50754056
7 changed files with 133 additions and 7 deletions

View File

@@ -55,6 +55,7 @@ from .const import (
CONF_WEB_SEARCH_CITY,
CONF_WEB_SEARCH_CONTEXT_SIZE,
CONF_WEB_SEARCH_COUNTRY,
CONF_WEB_SEARCH_INLINE_CITATIONS,
CONF_WEB_SEARCH_REGION,
CONF_WEB_SEARCH_TIMEZONE,
CONF_WEB_SEARCH_USER_LOCATION,
@@ -73,6 +74,7 @@ from .const import (
RECOMMENDED_VERBOSITY,
RECOMMENDED_WEB_SEARCH,
RECOMMENDED_WEB_SEARCH_CONTEXT_SIZE,
RECOMMENDED_WEB_SEARCH_INLINE_CITATIONS,
RECOMMENDED_WEB_SEARCH_USER_LOCATION,
UNSUPPORTED_IMAGE_MODELS,
UNSUPPORTED_MODELS,
@@ -396,6 +398,10 @@ class OpenAISubentryFlowHandler(ConfigSubentryFlow):
CONF_WEB_SEARCH_USER_LOCATION,
default=RECOMMENDED_WEB_SEARCH_USER_LOCATION,
): bool,
vol.Optional(
CONF_WEB_SEARCH_INLINE_CITATIONS,
default=RECOMMENDED_WEB_SEARCH_INLINE_CITATIONS,
): bool,
}
)
elif CONF_WEB_SEARCH in options:
@@ -411,6 +417,7 @@ class OpenAISubentryFlowHandler(ConfigSubentryFlow):
CONF_WEB_SEARCH_REGION,
CONF_WEB_SEARCH_COUNTRY,
CONF_WEB_SEARCH_TIMEZONE,
CONF_WEB_SEARCH_INLINE_CITATIONS,
)
}

View File

@@ -30,6 +30,7 @@ CONF_WEB_SEARCH_CITY = "city"
CONF_WEB_SEARCH_REGION = "region"
CONF_WEB_SEARCH_COUNTRY = "country"
CONF_WEB_SEARCH_TIMEZONE = "timezone"
CONF_WEB_SEARCH_INLINE_CITATIONS = "inline_citations"
RECOMMENDED_CODE_INTERPRETER = False
RECOMMENDED_CHAT_MODEL = "gpt-4o-mini"
RECOMMENDED_IMAGE_MODEL = "gpt-image-1"
@@ -41,6 +42,7 @@ RECOMMENDED_VERBOSITY = "medium"
RECOMMENDED_WEB_SEARCH = False
RECOMMENDED_WEB_SEARCH_CONTEXT_SIZE = "medium"
RECOMMENDED_WEB_SEARCH_USER_LOCATION = False
RECOMMENDED_WEB_SEARCH_INLINE_CITATIONS = False
UNSUPPORTED_MODELS: list[str] = [
"o1-mini",

View File

@@ -7,6 +7,7 @@ from collections.abc import AsyncGenerator, Callable, Iterable
import json
from mimetypes import guess_file_type
from pathlib import Path
import re
from typing import TYPE_CHECKING, Any, Literal, cast
import openai
@@ -29,6 +30,7 @@ from openai.types.responses import (
ResponseInputImageParam,
ResponseInputMessageContentListParam,
ResponseInputParam,
ResponseInputTextParam,
ResponseOutputItemAddedEvent,
ResponseOutputItemDoneEvent,
ResponseOutputMessage,
@@ -77,6 +79,7 @@ from .const import (
CONF_WEB_SEARCH_CITY,
CONF_WEB_SEARCH_CONTEXT_SIZE,
CONF_WEB_SEARCH_COUNTRY,
CONF_WEB_SEARCH_INLINE_CITATIONS,
CONF_WEB_SEARCH_REGION,
CONF_WEB_SEARCH_TIMEZONE,
CONF_WEB_SEARCH_USER_LOCATION,
@@ -90,6 +93,7 @@ from .const import (
RECOMMENDED_TOP_P,
RECOMMENDED_VERBOSITY,
RECOMMENDED_WEB_SEARCH_CONTEXT_SIZE,
RECOMMENDED_WEB_SEARCH_INLINE_CITATIONS,
)
if TYPE_CHECKING:
@@ -251,6 +255,7 @@ def _convert_content_to_param(
async def _transform_stream( # noqa: C901 - This is complex, but better to have it in one place
chat_log: conversation.ChatLog,
stream: AsyncStream[ResponseStreamEvent],
remove_citations: bool = False,
) -> AsyncGenerator[
conversation.AssistantContentDeltaDict | conversation.ToolResultContentDeltaDict
]:
@@ -258,6 +263,13 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
last_summary_index = None
last_role: Literal["assistant", "tool_result"] | None = None
# Non-reasoning models don't follow our request to remove citations, so we remove
# them manually here. They always follow the same pattern: the citation is always
# in parentheses in Markdown format, the citation is always in a single delta event,
# and sometimes the closing parenthesis is split into a separate delta event.
remove_parentheses: bool = False
citation_regexp = re.compile(r"\(\[([^\]]+)\]\((https?:\/\/[^\)]+)\)")
async for event in stream:
LOGGER.debug("Received event: %s", event)
@@ -344,7 +356,23 @@ async def _transform_stream( # noqa: C901 - This is complex, but better to have
yield {"native": event.item}
last_summary_index = -1 # Trigger new assistant message on next turn
elif isinstance(event, ResponseTextDeltaEvent):
yield {"content": event.delta}
data = event.delta
if remove_parentheses:
data = data.removeprefix(")")
remove_parentheses = False
elif remove_citations and (match := citation_regexp.search(data)):
match_start, match_end = match.span()
# remove leading space if any
if data[match_start - 1 : match_start] == " ":
match_start -= 1
# remove closing parenthesis:
if data[match_end : match_end + 1] == ")":
match_end += 1
else:
remove_parentheses = True
data = data[:match_start] + data[match_end:]
if data:
yield {"content": data}
elif isinstance(event, ResponseReasoningSummaryTextDeltaEvent):
# OpenAI can output several reasoning summaries
# in a single ResponseReasoningItem. We split them as separate
@@ -489,6 +517,7 @@ class OpenAIBaseLLMEntity(Entity):
for tool in chat_log.llm_api.tools
]
remove_citations = False
if options.get(CONF_WEB_SEARCH):
web_search = WebSearchToolParam(
type="web_search",
@@ -504,6 +533,27 @@ class OpenAIBaseLLMEntity(Entity):
country=options.get(CONF_WEB_SEARCH_COUNTRY, ""),
timezone=options.get(CONF_WEB_SEARCH_TIMEZONE, ""),
)
if not options.get(
CONF_WEB_SEARCH_INLINE_CITATIONS,
RECOMMENDED_WEB_SEARCH_INLINE_CITATIONS,
):
system_message = cast(EasyInputMessageParam, messages[0])
content = system_message["content"]
if isinstance(content, str):
system_message["content"] = [
ResponseInputTextParam(type="input_text", text=content)
]
system_message["content"].append( # type: ignore[union-attr]
ResponseInputTextParam(
type="input_text",
text="When doing a web search, do not include source citations",
)
)
if "reasoning" not in model_args:
# Reasoning models handle this correctly with just a prompt
remove_citations = True
tools.append(web_search)
if options.get(CONF_CODE_INTERPRETER):
@@ -573,7 +623,8 @@ class OpenAIBaseLLMEntity(Entity):
[
content
async for content in chat_log.async_add_delta_content_stream(
self.entity_id, _transform_stream(chat_log, stream)
self.entity_id,
_transform_stream(chat_log, stream, remove_citations),
)
]
)

View File

@@ -51,6 +51,7 @@
"data": {
"code_interpreter": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data::code_interpreter%]",
"image_model": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data::image_model%]",
"inline_citations": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data::inline_citations%]",
"reasoning_effort": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data::reasoning_effort%]",
"search_context_size": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data::search_context_size%]",
"user_location": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data::user_location%]",
@@ -59,6 +60,7 @@
"data_description": {
"code_interpreter": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data_description::code_interpreter%]",
"image_model": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data_description::image_model%]",
"inline_citations": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data_description::inline_citations%]",
"reasoning_effort": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data_description::reasoning_effort%]",
"search_context_size": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data_description::search_context_size%]",
"user_location": "[%key:component::openai_conversation::config_subentries::conversation::step::model::data_description::user_location%]",
@@ -74,7 +76,6 @@
"reconfigure_successful": "[%key:common::config_flow::abort::reconfigure_successful%]"
},
"entry_type": "Conversation agent",
"error": {
"model_not_supported": "This model is not supported, please select a different model",
"web_search_minimal_reasoning": "Web search is currently not supported with minimal reasoning effort"
@@ -108,6 +109,7 @@
"data": {
"code_interpreter": "Enable code interpreter tool",
"image_model": "Image generation model",
"inline_citations": "Include links in web search results",
"reasoning_effort": "Reasoning effort",
"search_context_size": "Search context size",
"user_location": "Include home location",
@@ -116,6 +118,7 @@
"data_description": {
"code_interpreter": "This tool, also known as the python tool to the model, allows it to run code to answer questions",
"image_model": "The model to use when generating images",
"inline_citations": "If disabled, additional prompt is added to ask the model to not include source citations",
"reasoning_effort": "How many reasoning tokens the model should generate before creating a response to the prompt",
"search_context_size": "High level guidance for the amount of context window space to use for the search",
"user_location": "Refine search results based on geography",

View File

@@ -207,7 +207,7 @@
}),
])
# ---
# name: test_web_search
# name: test_web_search[False]
list([
dict({
'content': "What's on the latest news?",
@@ -224,7 +224,40 @@
'type': 'web_search_call',
}),
dict({
'content': 'Home Assistant now supports ChatGPT Search in Assist',
'content': 'Home Assistant now supports ChatGPT Search in Assist.',
'role': 'assistant',
'type': 'message',
}),
dict({
'content': 'Thank you!',
'role': 'user',
'type': 'message',
}),
dict({
'content': 'You are welcome!',
'role': 'assistant',
'type': 'message',
}),
])
# ---
# name: test_web_search[True]
list([
dict({
'content': "What's on the latest news?",
'role': 'user',
'type': 'message',
}),
dict({
'action': dict({
'query': 'query',
'type': 'search',
}),
'id': 'ws_A',
'status': 'completed',
'type': 'web_search_call',
}),
dict({
'content': 'Home Assistant now supports ChatGPT Search in Assist ([release notes](https://www.home-assistant.io/blog/categories/release-notes/)).',
'role': 'assistant',
'type': 'message',
}),

View File

@@ -26,6 +26,7 @@ from homeassistant.components.openai_conversation.const import (
CONF_WEB_SEARCH_CITY,
CONF_WEB_SEARCH_CONTEXT_SIZE,
CONF_WEB_SEARCH_COUNTRY,
CONF_WEB_SEARCH_INLINE_CITATIONS,
CONF_WEB_SEARCH_REGION,
CONF_WEB_SEARCH_TIMEZONE,
CONF_WEB_SEARCH_USER_LOCATION,
@@ -404,6 +405,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
CONF_WEB_SEARCH: True,
CONF_WEB_SEARCH_CONTEXT_SIZE: "low",
CONF_WEB_SEARCH_USER_LOCATION: False,
CONF_WEB_SEARCH_INLINE_CITATIONS: True,
CONF_CODE_INTERPRETER: False,
},
),
@@ -417,6 +419,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
CONF_WEB_SEARCH: True,
CONF_WEB_SEARCH_CONTEXT_SIZE: "low",
CONF_WEB_SEARCH_USER_LOCATION: False,
CONF_WEB_SEARCH_INLINE_CITATIONS: True,
CONF_CODE_INTERPRETER: False,
},
),
@@ -436,6 +439,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
CONF_WEB_SEARCH_REGION: "California",
CONF_WEB_SEARCH_COUNTRY: "US",
CONF_WEB_SEARCH_TIMEZONE: "America/Los_Angeles",
CONF_WEB_SEARCH_INLINE_CITATIONS: True,
CONF_CODE_INTERPRETER: True,
},
(
@@ -453,6 +457,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
CONF_WEB_SEARCH: True,
CONF_WEB_SEARCH_CONTEXT_SIZE: "low",
CONF_WEB_SEARCH_USER_LOCATION: False,
CONF_WEB_SEARCH_INLINE_CITATIONS: True,
CONF_CODE_INTERPRETER: True,
},
),
@@ -466,6 +471,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
CONF_WEB_SEARCH: True,
CONF_WEB_SEARCH_CONTEXT_SIZE: "low",
CONF_WEB_SEARCH_USER_LOCATION: False,
CONF_WEB_SEARCH_INLINE_CITATIONS: True,
CONF_CODE_INTERPRETER: True,
},
),
@@ -483,6 +489,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
CONF_WEB_SEARCH: False,
CONF_WEB_SEARCH_CONTEXT_SIZE: "low",
CONF_WEB_SEARCH_USER_LOCATION: False,
CONF_WEB_SEARCH_INLINE_CITATIONS: True,
},
(
{
@@ -502,6 +509,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
CONF_WEB_SEARCH: False,
CONF_WEB_SEARCH_CONTEXT_SIZE: "low",
CONF_WEB_SEARCH_USER_LOCATION: False,
CONF_WEB_SEARCH_INLINE_CITATIONS: True,
},
),
{
@@ -517,6 +525,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
CONF_WEB_SEARCH: False,
CONF_WEB_SEARCH_CONTEXT_SIZE: "low",
CONF_WEB_SEARCH_USER_LOCATION: False,
CONF_WEB_SEARCH_INLINE_CITATIONS: True,
},
),
# Test that old options are removed after reconfiguration
@@ -536,6 +545,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
CONF_WEB_SEARCH_REGION: "California",
CONF_WEB_SEARCH_COUNTRY: "US",
CONF_WEB_SEARCH_TIMEZONE: "America/Los_Angeles",
CONF_WEB_SEARCH_INLINE_CITATIONS: True,
},
(
{
@@ -591,6 +601,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
CONF_WEB_SEARCH_REGION: "California",
CONF_WEB_SEARCH_COUNTRY: "US",
CONF_WEB_SEARCH_TIMEZONE: "America/Los_Angeles",
CONF_WEB_SEARCH_INLINE_CITATIONS: True,
CONF_CODE_INTERPRETER: True,
},
(
@@ -648,6 +659,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
CONF_WEB_SEARCH: True,
CONF_WEB_SEARCH_CONTEXT_SIZE: "high",
CONF_WEB_SEARCH_USER_LOCATION: False,
CONF_WEB_SEARCH_INLINE_CITATIONS: True,
CONF_CODE_INTERPRETER: False,
},
),
@@ -661,6 +673,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
CONF_WEB_SEARCH: True,
CONF_WEB_SEARCH_CONTEXT_SIZE: "high",
CONF_WEB_SEARCH_USER_LOCATION: False,
CONF_WEB_SEARCH_INLINE_CITATIONS: True,
CONF_CODE_INTERPRETER: False,
},
),
@@ -679,6 +692,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
CONF_WEB_SEARCH: True,
CONF_WEB_SEARCH_CONTEXT_SIZE: "high",
CONF_WEB_SEARCH_USER_LOCATION: False,
CONF_WEB_SEARCH_INLINE_CITATIONS: True,
},
(
{
@@ -695,6 +709,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
CONF_WEB_SEARCH: True,
CONF_WEB_SEARCH_CONTEXT_SIZE: "high",
CONF_WEB_SEARCH_USER_LOCATION: False,
CONF_WEB_SEARCH_INLINE_CITATIONS: True,
},
),
{
@@ -708,6 +723,7 @@ async def test_form_invalid_auth(hass: HomeAssistant, side_effect, error) -> Non
CONF_WEB_SEARCH: True,
CONF_WEB_SEARCH_CONTEXT_SIZE: "high",
CONF_WEB_SEARCH_USER_LOCATION: False,
CONF_WEB_SEARCH_INLINE_CITATIONS: True,
},
),
],
@@ -858,6 +874,7 @@ async def test_subentry_web_search_user_location(
CONF_WEB_SEARCH_REGION: "California",
CONF_WEB_SEARCH_COUNTRY: "US",
CONF_WEB_SEARCH_TIMEZONE: "America/Los_Angeles",
CONF_WEB_SEARCH_INLINE_CITATIONS: False,
CONF_CODE_INTERPRETER: False,
}

View File

@@ -21,6 +21,7 @@ from homeassistant.components.openai_conversation.const import (
CONF_WEB_SEARCH_CITY,
CONF_WEB_SEARCH_CONTEXT_SIZE,
CONF_WEB_SEARCH_COUNTRY,
CONF_WEB_SEARCH_INLINE_CITATIONS,
CONF_WEB_SEARCH_REGION,
CONF_WEB_SEARCH_TIMEZONE,
CONF_WEB_SEARCH_USER_LOCATION,
@@ -429,6 +430,7 @@ async def test_assist_api_tools_conversion(
assert tools
@pytest.mark.parametrize("inline_citations", [True, False])
async def test_web_search(
hass: HomeAssistant,
mock_config_entry: MockConfigEntry,
@@ -436,6 +438,7 @@ async def test_web_search(
mock_create_stream,
mock_chat_log: MockChatLog, # noqa: F811
snapshot: SnapshotAssertion,
inline_citations: bool,
) -> None:
"""Test web_search_tool."""
subentry = next(iter(mock_config_entry.subentries.values()))
@@ -451,11 +454,17 @@ async def test_web_search(
CONF_WEB_SEARCH_COUNTRY: "US",
CONF_WEB_SEARCH_REGION: "California",
CONF_WEB_SEARCH_TIMEZONE: "America/Los_Angeles",
CONF_WEB_SEARCH_INLINE_CITATIONS: inline_citations,
},
)
await hass.config_entries.async_reload(mock_config_entry.entry_id)
message = "Home Assistant now supports ChatGPT Search in Assist"
message = [
"Home Assistant now supports ",
"ChatGPT Search in Assist",
" ([release notes](https://www.home-assistant.io/blog/categories/release-notes/)",
").",
]
mock_create_stream.return_value = [
# Initial conversation
(
@@ -486,7 +495,6 @@ async def test_web_search(
}
]
assert result.response.response_type == intent.IntentResponseType.ACTION_DONE
assert result.response.speech["plain"]["speech"] == message, result.response.speech
# Test follow-up message in multi-turn conversation
mock_create_stream.return_value = [
@@ -501,6 +509,11 @@ async def test_web_search(
agent_id="conversation.openai_conversation",
)
assert (
isinstance(mock_create_stream.mock_calls[0][2]["input"][0]["content"], list)
and "do not include source citations"
in mock_create_stream.mock_calls[0][2]["input"][0]["content"][1]["text"]
) is not inline_citations
assert mock_create_stream.mock_calls[1][2]["input"][1:] == snapshot