mirror of
https://github.com/home-assistant/core.git
synced 2025-12-24 21:06:19 +00:00
Change collation to utf8mb4_bin for MySQL and MariaDB databases (#156297)
Co-authored-by: J. Nick Koston <nick@koston.org>
This commit is contained in:
@@ -12,7 +12,7 @@ from sqlalchemy.orm import DeclarativeBase
|
||||
from sqlalchemy.orm.attributes import InstrumentedAttribute
|
||||
|
||||
from ..const import SupportedDialect
|
||||
from ..db_schema import DOUBLE_PRECISION_TYPE_SQL, DOUBLE_TYPE
|
||||
from ..db_schema import DOUBLE_PRECISION_TYPE_SQL, DOUBLE_TYPE, MYSQL_COLLATE
|
||||
from ..util import session_scope
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -105,12 +105,13 @@ def _validate_table_schema_has_correct_collation(
|
||||
or dialect_kwargs.get("mariadb_collate")
|
||||
or connection.dialect._fetch_setting(connection, "collation_server") # type: ignore[attr-defined] # noqa: SLF001
|
||||
)
|
||||
if collate and collate != "utf8mb4_unicode_ci":
|
||||
if collate and collate != MYSQL_COLLATE:
|
||||
_LOGGER.debug(
|
||||
"Database %s collation is not utf8mb4_unicode_ci",
|
||||
"Database %s collation is not %s",
|
||||
table,
|
||||
MYSQL_COLLATE,
|
||||
)
|
||||
schema_errors.add(f"{table}.utf8mb4_unicode_ci")
|
||||
schema_errors.add(f"{table}.{MYSQL_COLLATE}")
|
||||
return schema_errors
|
||||
|
||||
|
||||
@@ -240,7 +241,7 @@ def correct_db_schema_utf8(
|
||||
table_name = table_object.__tablename__
|
||||
if (
|
||||
f"{table_name}.4-byte UTF-8" in schema_errors
|
||||
or f"{table_name}.utf8mb4_unicode_ci" in schema_errors
|
||||
or f"{table_name}.{MYSQL_COLLATE}" in schema_errors
|
||||
):
|
||||
from ..migration import ( # noqa: PLC0415
|
||||
_correct_table_character_set_and_collation,
|
||||
|
||||
@@ -71,7 +71,7 @@ class LegacyBase(DeclarativeBase):
|
||||
"""Base class for tables, used for schema migration."""
|
||||
|
||||
|
||||
SCHEMA_VERSION = 52
|
||||
SCHEMA_VERSION = 53
|
||||
|
||||
_LOGGER = logging.getLogger(__name__)
|
||||
|
||||
@@ -128,7 +128,7 @@ LEGACY_STATES_ENTITY_ID_LAST_UPDATED_TS_INDEX = "ix_states_entity_id_last_update
|
||||
LEGACY_MAX_LENGTH_EVENT_CONTEXT_ID: Final = 36
|
||||
CONTEXT_ID_BIN_MAX_LENGTH = 16
|
||||
|
||||
MYSQL_COLLATE = "utf8mb4_unicode_ci"
|
||||
MYSQL_COLLATE = "utf8mb4_bin"
|
||||
MYSQL_DEFAULT_CHARSET = "utf8mb4"
|
||||
MYSQL_ENGINE = "InnoDB"
|
||||
|
||||
|
||||
@@ -1361,7 +1361,7 @@ class _SchemaVersion20Migrator(_SchemaVersionMigrator, target_version=20):
|
||||
class _SchemaVersion21Migrator(_SchemaVersionMigrator, target_version=21):
|
||||
def _apply_update(self) -> None:
|
||||
"""Version specific update method."""
|
||||
# Try to change the character set of the statistic_meta table
|
||||
# Try to change the character set of events, states and statistics_meta tables
|
||||
if self.engine.dialect.name == SupportedDialect.MYSQL:
|
||||
for table in ("events", "states", "statistics_meta"):
|
||||
_correct_table_character_set_and_collation(table, self.session_maker)
|
||||
@@ -2125,6 +2125,23 @@ class _SchemaVersion52Migrator(_SchemaVersionMigrator, target_version=52):
|
||||
)
|
||||
|
||||
|
||||
class _SchemaVersion53Migrator(_SchemaVersionMigrator, target_version=53):
|
||||
def _apply_update(self) -> None:
|
||||
"""Version specific update method."""
|
||||
# Try to change the character set of events, states and statistics_meta tables
|
||||
if self.engine.dialect.name == SupportedDialect.MYSQL:
|
||||
for table in (
|
||||
"events",
|
||||
"event_data",
|
||||
"states",
|
||||
"state_attributes",
|
||||
"statistics",
|
||||
"statistics_meta",
|
||||
"statistics_short_term",
|
||||
):
|
||||
_correct_table_character_set_and_collation(table, self.session_maker)
|
||||
|
||||
|
||||
def _migrate_statistics_columns_to_timestamp_removing_duplicates(
|
||||
hass: HomeAssistant,
|
||||
instance: Recorder,
|
||||
@@ -2167,8 +2184,10 @@ def _correct_table_character_set_and_collation(
|
||||
"""Correct issues detected by validate_db_schema."""
|
||||
# Attempt to convert the table to utf8mb4
|
||||
_LOGGER.warning(
|
||||
"Updating character set and collation of table %s to utf8mb4. %s",
|
||||
"Updating table %s to character set %s and collation %s. %s",
|
||||
table,
|
||||
MYSQL_DEFAULT_CHARSET,
|
||||
MYSQL_COLLATE,
|
||||
MIGRATION_NOTE_MINUTES,
|
||||
)
|
||||
with (
|
||||
|
||||
@@ -82,7 +82,7 @@ async def test_validate_db_schema_fix_utf8_issue_event_data(
|
||||
in caplog.text
|
||||
)
|
||||
assert (
|
||||
"Updating character set and collation of table event_data to utf8mb4"
|
||||
"Updating table event_data to character set utf8mb4 and collation utf8mb4_bin"
|
||||
in caplog.text
|
||||
)
|
||||
|
||||
@@ -103,7 +103,7 @@ async def test_validate_db_schema_fix_collation_issue(
|
||||
with (
|
||||
patch(
|
||||
"homeassistant.components.recorder.auto_repairs.schema._validate_table_schema_has_correct_collation",
|
||||
return_value={"events.utf8mb4_unicode_ci"},
|
||||
return_value={"events.utf8mb4_bin"},
|
||||
),
|
||||
):
|
||||
async with async_test_recorder(hass):
|
||||
@@ -111,9 +111,10 @@ async def test_validate_db_schema_fix_collation_issue(
|
||||
|
||||
assert "Schema validation failed" not in caplog.text
|
||||
assert (
|
||||
"Database is about to correct DB schema errors: events.utf8mb4_unicode_ci"
|
||||
"Database is about to correct DB schema errors: events.utf8mb4_bin"
|
||||
in caplog.text
|
||||
)
|
||||
assert (
|
||||
"Updating character set and collation of table events to utf8mb4" in caplog.text
|
||||
"Updating table events to character set utf8mb4 and collation utf8mb4_bin"
|
||||
in caplog.text
|
||||
)
|
||||
|
||||
@@ -84,7 +84,8 @@ async def test_validate_db_schema_fix_utf8_issue_states(
|
||||
in caplog.text
|
||||
)
|
||||
assert (
|
||||
"Updating character set and collation of table states to utf8mb4" in caplog.text
|
||||
"Updating table states to character set utf8mb4 and collation utf8mb4_bin"
|
||||
in caplog.text
|
||||
)
|
||||
|
||||
|
||||
@@ -116,7 +117,7 @@ async def test_validate_db_schema_fix_utf8_issue_state_attributes(
|
||||
in caplog.text
|
||||
)
|
||||
assert (
|
||||
"Updating character set and collation of table state_attributes to utf8mb4"
|
||||
"Updating table state_attributes to character set utf8mb4 and collation utf8mb4_bin"
|
||||
in caplog.text
|
||||
)
|
||||
|
||||
@@ -137,7 +138,7 @@ async def test_validate_db_schema_fix_collation_issue(
|
||||
with (
|
||||
patch(
|
||||
"homeassistant.components.recorder.auto_repairs.schema._validate_table_schema_has_correct_collation",
|
||||
return_value={"states.utf8mb4_unicode_ci"},
|
||||
return_value={"states.utf8mb4_bin"},
|
||||
),
|
||||
):
|
||||
async with async_test_recorder(hass):
|
||||
@@ -145,9 +146,10 @@ async def test_validate_db_schema_fix_collation_issue(
|
||||
|
||||
assert "Schema validation failed" not in caplog.text
|
||||
assert (
|
||||
"Database is about to correct DB schema errors: states.utf8mb4_unicode_ci"
|
||||
"Database is about to correct DB schema errors: states.utf8mb4_bin"
|
||||
in caplog.text
|
||||
)
|
||||
assert (
|
||||
"Updating character set and collation of table states to utf8mb4" in caplog.text
|
||||
"Updating table states to character set utf8mb4 and collation utf8mb4_bin"
|
||||
in caplog.text
|
||||
)
|
||||
|
||||
@@ -46,7 +46,7 @@ async def test_validate_db_schema_fix_utf8_issue(
|
||||
in caplog.text
|
||||
)
|
||||
assert (
|
||||
"Updating character set and collation of table statistics_meta to utf8mb4"
|
||||
"Updating table statistics_meta to character set utf8mb4 and collation utf8mb4_bin"
|
||||
in caplog.text
|
||||
)
|
||||
|
||||
@@ -113,7 +113,7 @@ async def test_validate_db_schema_fix_collation_issue(
|
||||
with (
|
||||
patch(
|
||||
"homeassistant.components.recorder.auto_repairs.schema._validate_table_schema_has_correct_collation",
|
||||
return_value={"statistics.utf8mb4_unicode_ci"},
|
||||
return_value={"statistics.utf8mb4_bin"},
|
||||
),
|
||||
):
|
||||
async with async_test_recorder(hass):
|
||||
@@ -121,10 +121,10 @@ async def test_validate_db_schema_fix_collation_issue(
|
||||
|
||||
assert "Schema validation failed" not in caplog.text
|
||||
assert (
|
||||
"Database is about to correct DB schema errors: statistics.utf8mb4_unicode_ci"
|
||||
"Database is about to correct DB schema errors: statistics.utf8mb4_bin"
|
||||
in caplog.text
|
||||
)
|
||||
assert (
|
||||
"Updating character set and collation of table statistics to utf8mb4"
|
||||
"Updating table statistics to character set utf8mb4 and collation utf8mb4_bin"
|
||||
in caplog.text
|
||||
)
|
||||
|
||||
@@ -103,10 +103,16 @@ async def test_validate_db_schema_fix_utf8_issue_with_broken_schema(
|
||||
|
||||
@pytest.mark.skip_on_db_engine(["postgresql", "sqlite"])
|
||||
@pytest.mark.usefixtures("skip_by_db_engine")
|
||||
@pytest.mark.parametrize(
|
||||
("charset", "collation"),
|
||||
[("utf8mb3", "utf8_general_ci"), ("utf8mb4", "utf8mb4_unicode_ci")],
|
||||
)
|
||||
async def test_validate_db_schema_fix_incorrect_collation(
|
||||
hass: HomeAssistant,
|
||||
recorder_mock: Recorder,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
charset: str,
|
||||
collation: str,
|
||||
) -> None:
|
||||
"""Test validating DB schema with MySQL when the collation is incorrect."""
|
||||
await async_wait_recording_done(hass)
|
||||
@@ -116,7 +122,7 @@ async def test_validate_db_schema_fix_incorrect_collation(
|
||||
with session_scope(session=session_maker()) as session:
|
||||
session.execute(
|
||||
text(
|
||||
"ALTER TABLE states CHARACTER SET utf8mb3 COLLATE utf8_general_ci, "
|
||||
f"ALTER TABLE states CHARACTER SET {charset} COLLATE {collation}, "
|
||||
"LOCK=EXCLUSIVE;"
|
||||
)
|
||||
)
|
||||
@@ -125,7 +131,7 @@ async def test_validate_db_schema_fix_incorrect_collation(
|
||||
schema_errors = await recorder_mock.async_add_executor_job(
|
||||
validate_table_schema_has_correct_collation, recorder_mock, States
|
||||
)
|
||||
assert schema_errors == {"states.utf8mb4_unicode_ci"}
|
||||
assert schema_errors == {"states.utf8mb4_bin"}
|
||||
|
||||
# Now repair the schema
|
||||
await recorder_mock.async_add_executor_job(
|
||||
|
||||
Reference in New Issue
Block a user