1
0
mirror of https://github.com/home-assistant/core.git synced 2025-12-24 21:06:19 +00:00

Change collation to utf8mb4_bin for MySQL and MariaDB databases (#156297)

Co-authored-by: J. Nick Koston <nick@koston.org>
This commit is contained in:
Erik Montnemery
2025-11-12 16:54:58 +01:00
committed by GitHub
parent f70aeafb5f
commit 0711d62085
7 changed files with 53 additions and 24 deletions

View File

@@ -12,7 +12,7 @@ from sqlalchemy.orm import DeclarativeBase
from sqlalchemy.orm.attributes import InstrumentedAttribute
from ..const import SupportedDialect
from ..db_schema import DOUBLE_PRECISION_TYPE_SQL, DOUBLE_TYPE
from ..db_schema import DOUBLE_PRECISION_TYPE_SQL, DOUBLE_TYPE, MYSQL_COLLATE
from ..util import session_scope
if TYPE_CHECKING:
@@ -105,12 +105,13 @@ def _validate_table_schema_has_correct_collation(
or dialect_kwargs.get("mariadb_collate")
or connection.dialect._fetch_setting(connection, "collation_server") # type: ignore[attr-defined] # noqa: SLF001
)
if collate and collate != "utf8mb4_unicode_ci":
if collate and collate != MYSQL_COLLATE:
_LOGGER.debug(
"Database %s collation is not utf8mb4_unicode_ci",
"Database %s collation is not %s",
table,
MYSQL_COLLATE,
)
schema_errors.add(f"{table}.utf8mb4_unicode_ci")
schema_errors.add(f"{table}.{MYSQL_COLLATE}")
return schema_errors
@@ -240,7 +241,7 @@ def correct_db_schema_utf8(
table_name = table_object.__tablename__
if (
f"{table_name}.4-byte UTF-8" in schema_errors
or f"{table_name}.utf8mb4_unicode_ci" in schema_errors
or f"{table_name}.{MYSQL_COLLATE}" in schema_errors
):
from ..migration import ( # noqa: PLC0415
_correct_table_character_set_and_collation,

View File

@@ -71,7 +71,7 @@ class LegacyBase(DeclarativeBase):
"""Base class for tables, used for schema migration."""
SCHEMA_VERSION = 52
SCHEMA_VERSION = 53
_LOGGER = logging.getLogger(__name__)
@@ -128,7 +128,7 @@ LEGACY_STATES_ENTITY_ID_LAST_UPDATED_TS_INDEX = "ix_states_entity_id_last_update
LEGACY_MAX_LENGTH_EVENT_CONTEXT_ID: Final = 36
CONTEXT_ID_BIN_MAX_LENGTH = 16
MYSQL_COLLATE = "utf8mb4_unicode_ci"
MYSQL_COLLATE = "utf8mb4_bin"
MYSQL_DEFAULT_CHARSET = "utf8mb4"
MYSQL_ENGINE = "InnoDB"

View File

@@ -1361,7 +1361,7 @@ class _SchemaVersion20Migrator(_SchemaVersionMigrator, target_version=20):
class _SchemaVersion21Migrator(_SchemaVersionMigrator, target_version=21):
def _apply_update(self) -> None:
"""Version specific update method."""
# Try to change the character set of the statistic_meta table
# Try to change the character set of events, states and statistics_meta tables
if self.engine.dialect.name == SupportedDialect.MYSQL:
for table in ("events", "states", "statistics_meta"):
_correct_table_character_set_and_collation(table, self.session_maker)
@@ -2125,6 +2125,23 @@ class _SchemaVersion52Migrator(_SchemaVersionMigrator, target_version=52):
)
class _SchemaVersion53Migrator(_SchemaVersionMigrator, target_version=53):
def _apply_update(self) -> None:
"""Version specific update method."""
# Try to change the character set of events, states and statistics_meta tables
if self.engine.dialect.name == SupportedDialect.MYSQL:
for table in (
"events",
"event_data",
"states",
"state_attributes",
"statistics",
"statistics_meta",
"statistics_short_term",
):
_correct_table_character_set_and_collation(table, self.session_maker)
def _migrate_statistics_columns_to_timestamp_removing_duplicates(
hass: HomeAssistant,
instance: Recorder,
@@ -2167,8 +2184,10 @@ def _correct_table_character_set_and_collation(
"""Correct issues detected by validate_db_schema."""
# Attempt to convert the table to utf8mb4
_LOGGER.warning(
"Updating character set and collation of table %s to utf8mb4. %s",
"Updating table %s to character set %s and collation %s. %s",
table,
MYSQL_DEFAULT_CHARSET,
MYSQL_COLLATE,
MIGRATION_NOTE_MINUTES,
)
with (

View File

@@ -82,7 +82,7 @@ async def test_validate_db_schema_fix_utf8_issue_event_data(
in caplog.text
)
assert (
"Updating character set and collation of table event_data to utf8mb4"
"Updating table event_data to character set utf8mb4 and collation utf8mb4_bin"
in caplog.text
)
@@ -103,7 +103,7 @@ async def test_validate_db_schema_fix_collation_issue(
with (
patch(
"homeassistant.components.recorder.auto_repairs.schema._validate_table_schema_has_correct_collation",
return_value={"events.utf8mb4_unicode_ci"},
return_value={"events.utf8mb4_bin"},
),
):
async with async_test_recorder(hass):
@@ -111,9 +111,10 @@ async def test_validate_db_schema_fix_collation_issue(
assert "Schema validation failed" not in caplog.text
assert (
"Database is about to correct DB schema errors: events.utf8mb4_unicode_ci"
"Database is about to correct DB schema errors: events.utf8mb4_bin"
in caplog.text
)
assert (
"Updating character set and collation of table events to utf8mb4" in caplog.text
"Updating table events to character set utf8mb4 and collation utf8mb4_bin"
in caplog.text
)

View File

@@ -84,7 +84,8 @@ async def test_validate_db_schema_fix_utf8_issue_states(
in caplog.text
)
assert (
"Updating character set and collation of table states to utf8mb4" in caplog.text
"Updating table states to character set utf8mb4 and collation utf8mb4_bin"
in caplog.text
)
@@ -116,7 +117,7 @@ async def test_validate_db_schema_fix_utf8_issue_state_attributes(
in caplog.text
)
assert (
"Updating character set and collation of table state_attributes to utf8mb4"
"Updating table state_attributes to character set utf8mb4 and collation utf8mb4_bin"
in caplog.text
)
@@ -137,7 +138,7 @@ async def test_validate_db_schema_fix_collation_issue(
with (
patch(
"homeassistant.components.recorder.auto_repairs.schema._validate_table_schema_has_correct_collation",
return_value={"states.utf8mb4_unicode_ci"},
return_value={"states.utf8mb4_bin"},
),
):
async with async_test_recorder(hass):
@@ -145,9 +146,10 @@ async def test_validate_db_schema_fix_collation_issue(
assert "Schema validation failed" not in caplog.text
assert (
"Database is about to correct DB schema errors: states.utf8mb4_unicode_ci"
"Database is about to correct DB schema errors: states.utf8mb4_bin"
in caplog.text
)
assert (
"Updating character set and collation of table states to utf8mb4" in caplog.text
"Updating table states to character set utf8mb4 and collation utf8mb4_bin"
in caplog.text
)

View File

@@ -46,7 +46,7 @@ async def test_validate_db_schema_fix_utf8_issue(
in caplog.text
)
assert (
"Updating character set and collation of table statistics_meta to utf8mb4"
"Updating table statistics_meta to character set utf8mb4 and collation utf8mb4_bin"
in caplog.text
)
@@ -113,7 +113,7 @@ async def test_validate_db_schema_fix_collation_issue(
with (
patch(
"homeassistant.components.recorder.auto_repairs.schema._validate_table_schema_has_correct_collation",
return_value={"statistics.utf8mb4_unicode_ci"},
return_value={"statistics.utf8mb4_bin"},
),
):
async with async_test_recorder(hass):
@@ -121,10 +121,10 @@ async def test_validate_db_schema_fix_collation_issue(
assert "Schema validation failed" not in caplog.text
assert (
"Database is about to correct DB schema errors: statistics.utf8mb4_unicode_ci"
"Database is about to correct DB schema errors: statistics.utf8mb4_bin"
in caplog.text
)
assert (
"Updating character set and collation of table statistics to utf8mb4"
"Updating table statistics to character set utf8mb4 and collation utf8mb4_bin"
in caplog.text
)

View File

@@ -103,10 +103,16 @@ async def test_validate_db_schema_fix_utf8_issue_with_broken_schema(
@pytest.mark.skip_on_db_engine(["postgresql", "sqlite"])
@pytest.mark.usefixtures("skip_by_db_engine")
@pytest.mark.parametrize(
("charset", "collation"),
[("utf8mb3", "utf8_general_ci"), ("utf8mb4", "utf8mb4_unicode_ci")],
)
async def test_validate_db_schema_fix_incorrect_collation(
hass: HomeAssistant,
recorder_mock: Recorder,
caplog: pytest.LogCaptureFixture,
charset: str,
collation: str,
) -> None:
"""Test validating DB schema with MySQL when the collation is incorrect."""
await async_wait_recording_done(hass)
@@ -116,7 +122,7 @@ async def test_validate_db_schema_fix_incorrect_collation(
with session_scope(session=session_maker()) as session:
session.execute(
text(
"ALTER TABLE states CHARACTER SET utf8mb3 COLLATE utf8_general_ci, "
f"ALTER TABLE states CHARACTER SET {charset} COLLATE {collation}, "
"LOCK=EXCLUSIVE;"
)
)
@@ -125,7 +131,7 @@ async def test_validate_db_schema_fix_incorrect_collation(
schema_errors = await recorder_mock.async_add_executor_job(
validate_table_schema_has_correct_collation, recorder_mock, States
)
assert schema_errors == {"states.utf8mb4_unicode_ci"}
assert schema_errors == {"states.utf8mb4_bin"}
# Now repair the schema
await recorder_mock.async_add_executor_job(