diff --git a/homeassistant/components/recorder/auto_repairs/schema.py b/homeassistant/components/recorder/auto_repairs/schema.py index 3952f76bddd..2a09324dfe1 100644 --- a/homeassistant/components/recorder/auto_repairs/schema.py +++ b/homeassistant/components/recorder/auto_repairs/schema.py @@ -12,7 +12,7 @@ from sqlalchemy.orm import DeclarativeBase from sqlalchemy.orm.attributes import InstrumentedAttribute from ..const import SupportedDialect -from ..db_schema import DOUBLE_PRECISION_TYPE_SQL, DOUBLE_TYPE +from ..db_schema import DOUBLE_PRECISION_TYPE_SQL, DOUBLE_TYPE, MYSQL_COLLATE from ..util import session_scope if TYPE_CHECKING: @@ -105,12 +105,13 @@ def _validate_table_schema_has_correct_collation( or dialect_kwargs.get("mariadb_collate") or connection.dialect._fetch_setting(connection, "collation_server") # type: ignore[attr-defined] # noqa: SLF001 ) - if collate and collate != "utf8mb4_unicode_ci": + if collate and collate != MYSQL_COLLATE: _LOGGER.debug( - "Database %s collation is not utf8mb4_unicode_ci", + "Database %s collation is not %s", table, + MYSQL_COLLATE, ) - schema_errors.add(f"{table}.utf8mb4_unicode_ci") + schema_errors.add(f"{table}.{MYSQL_COLLATE}") return schema_errors @@ -240,7 +241,7 @@ def correct_db_schema_utf8( table_name = table_object.__tablename__ if ( f"{table_name}.4-byte UTF-8" in schema_errors - or f"{table_name}.utf8mb4_unicode_ci" in schema_errors + or f"{table_name}.{MYSQL_COLLATE}" in schema_errors ): from ..migration import ( # noqa: PLC0415 _correct_table_character_set_and_collation, diff --git a/homeassistant/components/recorder/db_schema.py b/homeassistant/components/recorder/db_schema.py index 836924fc860..65de7e853a3 100644 --- a/homeassistant/components/recorder/db_schema.py +++ b/homeassistant/components/recorder/db_schema.py @@ -71,7 +71,7 @@ class LegacyBase(DeclarativeBase): """Base class for tables, used for schema migration.""" -SCHEMA_VERSION = 52 +SCHEMA_VERSION = 53 _LOGGER = logging.getLogger(__name__) @@ -128,7 +128,7 @@ LEGACY_STATES_ENTITY_ID_LAST_UPDATED_TS_INDEX = "ix_states_entity_id_last_update LEGACY_MAX_LENGTH_EVENT_CONTEXT_ID: Final = 36 CONTEXT_ID_BIN_MAX_LENGTH = 16 -MYSQL_COLLATE = "utf8mb4_unicode_ci" +MYSQL_COLLATE = "utf8mb4_bin" MYSQL_DEFAULT_CHARSET = "utf8mb4" MYSQL_ENGINE = "InnoDB" diff --git a/homeassistant/components/recorder/migration.py b/homeassistant/components/recorder/migration.py index 47d7ae58c3b..c8548dc2a97 100644 --- a/homeassistant/components/recorder/migration.py +++ b/homeassistant/components/recorder/migration.py @@ -1361,7 +1361,7 @@ class _SchemaVersion20Migrator(_SchemaVersionMigrator, target_version=20): class _SchemaVersion21Migrator(_SchemaVersionMigrator, target_version=21): def _apply_update(self) -> None: """Version specific update method.""" - # Try to change the character set of the statistic_meta table + # Try to change the character set of events, states and statistics_meta tables if self.engine.dialect.name == SupportedDialect.MYSQL: for table in ("events", "states", "statistics_meta"): _correct_table_character_set_and_collation(table, self.session_maker) @@ -2125,6 +2125,23 @@ class _SchemaVersion52Migrator(_SchemaVersionMigrator, target_version=52): ) +class _SchemaVersion53Migrator(_SchemaVersionMigrator, target_version=53): + def _apply_update(self) -> None: + """Version specific update method.""" + # Try to change the character set of events, states and statistics_meta tables + if self.engine.dialect.name == SupportedDialect.MYSQL: + for table in ( + "events", + "event_data", + "states", + "state_attributes", + "statistics", + "statistics_meta", + "statistics_short_term", + ): + _correct_table_character_set_and_collation(table, self.session_maker) + + def _migrate_statistics_columns_to_timestamp_removing_duplicates( hass: HomeAssistant, instance: Recorder, @@ -2167,8 +2184,10 @@ def _correct_table_character_set_and_collation( """Correct issues detected by validate_db_schema.""" # Attempt to convert the table to utf8mb4 _LOGGER.warning( - "Updating character set and collation of table %s to utf8mb4. %s", + "Updating table %s to character set %s and collation %s. %s", table, + MYSQL_DEFAULT_CHARSET, + MYSQL_COLLATE, MIGRATION_NOTE_MINUTES, ) with ( diff --git a/tests/components/recorder/auto_repairs/events/test_schema.py b/tests/components/recorder/auto_repairs/events/test_schema.py index 91f5bd50298..277749ce18a 100644 --- a/tests/components/recorder/auto_repairs/events/test_schema.py +++ b/tests/components/recorder/auto_repairs/events/test_schema.py @@ -82,7 +82,7 @@ async def test_validate_db_schema_fix_utf8_issue_event_data( in caplog.text ) assert ( - "Updating character set and collation of table event_data to utf8mb4" + "Updating table event_data to character set utf8mb4 and collation utf8mb4_bin" in caplog.text ) @@ -103,7 +103,7 @@ async def test_validate_db_schema_fix_collation_issue( with ( patch( "homeassistant.components.recorder.auto_repairs.schema._validate_table_schema_has_correct_collation", - return_value={"events.utf8mb4_unicode_ci"}, + return_value={"events.utf8mb4_bin"}, ), ): async with async_test_recorder(hass): @@ -111,9 +111,10 @@ async def test_validate_db_schema_fix_collation_issue( assert "Schema validation failed" not in caplog.text assert ( - "Database is about to correct DB schema errors: events.utf8mb4_unicode_ci" + "Database is about to correct DB schema errors: events.utf8mb4_bin" in caplog.text ) assert ( - "Updating character set and collation of table events to utf8mb4" in caplog.text + "Updating table events to character set utf8mb4 and collation utf8mb4_bin" + in caplog.text ) diff --git a/tests/components/recorder/auto_repairs/states/test_schema.py b/tests/components/recorder/auto_repairs/states/test_schema.py index 982a6a732b6..8a831409a71 100644 --- a/tests/components/recorder/auto_repairs/states/test_schema.py +++ b/tests/components/recorder/auto_repairs/states/test_schema.py @@ -84,7 +84,8 @@ async def test_validate_db_schema_fix_utf8_issue_states( in caplog.text ) assert ( - "Updating character set and collation of table states to utf8mb4" in caplog.text + "Updating table states to character set utf8mb4 and collation utf8mb4_bin" + in caplog.text ) @@ -116,7 +117,7 @@ async def test_validate_db_schema_fix_utf8_issue_state_attributes( in caplog.text ) assert ( - "Updating character set and collation of table state_attributes to utf8mb4" + "Updating table state_attributes to character set utf8mb4 and collation utf8mb4_bin" in caplog.text ) @@ -137,7 +138,7 @@ async def test_validate_db_schema_fix_collation_issue( with ( patch( "homeassistant.components.recorder.auto_repairs.schema._validate_table_schema_has_correct_collation", - return_value={"states.utf8mb4_unicode_ci"}, + return_value={"states.utf8mb4_bin"}, ), ): async with async_test_recorder(hass): @@ -145,9 +146,10 @@ async def test_validate_db_schema_fix_collation_issue( assert "Schema validation failed" not in caplog.text assert ( - "Database is about to correct DB schema errors: states.utf8mb4_unicode_ci" + "Database is about to correct DB schema errors: states.utf8mb4_bin" in caplog.text ) assert ( - "Updating character set and collation of table states to utf8mb4" in caplog.text + "Updating table states to character set utf8mb4 and collation utf8mb4_bin" + in caplog.text ) diff --git a/tests/components/recorder/auto_repairs/statistics/test_schema.py b/tests/components/recorder/auto_repairs/statistics/test_schema.py index 99d6705e4a4..0cbb0648f34 100644 --- a/tests/components/recorder/auto_repairs/statistics/test_schema.py +++ b/tests/components/recorder/auto_repairs/statistics/test_schema.py @@ -46,7 +46,7 @@ async def test_validate_db_schema_fix_utf8_issue( in caplog.text ) assert ( - "Updating character set and collation of table statistics_meta to utf8mb4" + "Updating table statistics_meta to character set utf8mb4 and collation utf8mb4_bin" in caplog.text ) @@ -113,7 +113,7 @@ async def test_validate_db_schema_fix_collation_issue( with ( patch( "homeassistant.components.recorder.auto_repairs.schema._validate_table_schema_has_correct_collation", - return_value={"statistics.utf8mb4_unicode_ci"}, + return_value={"statistics.utf8mb4_bin"}, ), ): async with async_test_recorder(hass): @@ -121,10 +121,10 @@ async def test_validate_db_schema_fix_collation_issue( assert "Schema validation failed" not in caplog.text assert ( - "Database is about to correct DB schema errors: statistics.utf8mb4_unicode_ci" + "Database is about to correct DB schema errors: statistics.utf8mb4_bin" in caplog.text ) assert ( - "Updating character set and collation of table statistics to utf8mb4" + "Updating table statistics to character set utf8mb4 and collation utf8mb4_bin" in caplog.text ) diff --git a/tests/components/recorder/auto_repairs/test_schema.py b/tests/components/recorder/auto_repairs/test_schema.py index 55b03419767..24330db1b7e 100644 --- a/tests/components/recorder/auto_repairs/test_schema.py +++ b/tests/components/recorder/auto_repairs/test_schema.py @@ -103,10 +103,16 @@ async def test_validate_db_schema_fix_utf8_issue_with_broken_schema( @pytest.mark.skip_on_db_engine(["postgresql", "sqlite"]) @pytest.mark.usefixtures("skip_by_db_engine") +@pytest.mark.parametrize( + ("charset", "collation"), + [("utf8mb3", "utf8_general_ci"), ("utf8mb4", "utf8mb4_unicode_ci")], +) async def test_validate_db_schema_fix_incorrect_collation( hass: HomeAssistant, recorder_mock: Recorder, caplog: pytest.LogCaptureFixture, + charset: str, + collation: str, ) -> None: """Test validating DB schema with MySQL when the collation is incorrect.""" await async_wait_recording_done(hass) @@ -116,7 +122,7 @@ async def test_validate_db_schema_fix_incorrect_collation( with session_scope(session=session_maker()) as session: session.execute( text( - "ALTER TABLE states CHARACTER SET utf8mb3 COLLATE utf8_general_ci, " + f"ALTER TABLE states CHARACTER SET {charset} COLLATE {collation}, " "LOCK=EXCLUSIVE;" ) ) @@ -125,7 +131,7 @@ async def test_validate_db_schema_fix_incorrect_collation( schema_errors = await recorder_mock.async_add_executor_job( validate_table_schema_has_correct_collation, recorder_mock, States ) - assert schema_errors == {"states.utf8mb4_unicode_ci"} + assert schema_errors == {"states.utf8mb4_bin"} # Now repair the schema await recorder_mock.async_add_executor_job(