1
0
mirror of https://github.com/home-assistant/core.git synced 2026-02-15 07:36:16 +00:00

Prevent multiple Home Assistant instances from running with the same config directory (#151631)

This commit is contained in:
J. Nick Koston
2025-09-03 13:13:02 -05:00
committed by GitHub
parent 3385151c26
commit 9b80cf7d94
3 changed files with 394 additions and 26 deletions

View File

@@ -187,36 +187,42 @@ def main() -> int:
from . import config, runner # noqa: PLC0415
safe_mode = config.safe_mode_enabled(config_dir)
# Ensure only one instance runs per config directory
with runner.ensure_single_execution(config_dir) as single_execution_lock:
# Check if another instance is already running
if single_execution_lock.exit_code is not None:
return single_execution_lock.exit_code
runtime_conf = runner.RuntimeConfig(
config_dir=config_dir,
verbose=args.verbose,
log_rotate_days=args.log_rotate_days,
log_file=args.log_file,
log_no_color=args.log_no_color,
skip_pip=args.skip_pip,
skip_pip_packages=args.skip_pip_packages,
recovery_mode=args.recovery_mode,
debug=args.debug,
open_ui=args.open_ui,
safe_mode=safe_mode,
)
safe_mode = config.safe_mode_enabled(config_dir)
fault_file_name = os.path.join(config_dir, FAULT_LOG_FILENAME)
with open(fault_file_name, mode="a", encoding="utf8") as fault_file:
faulthandler.enable(fault_file)
exit_code = runner.run(runtime_conf)
faulthandler.disable()
runtime_conf = runner.RuntimeConfig(
config_dir=config_dir,
verbose=args.verbose,
log_rotate_days=args.log_rotate_days,
log_file=args.log_file,
log_no_color=args.log_no_color,
skip_pip=args.skip_pip,
skip_pip_packages=args.skip_pip_packages,
recovery_mode=args.recovery_mode,
debug=args.debug,
open_ui=args.open_ui,
safe_mode=safe_mode,
)
# It's possible for the fault file to disappear, so suppress obvious errors
with suppress(FileNotFoundError):
if os.path.getsize(fault_file_name) == 0:
os.remove(fault_file_name)
fault_file_name = os.path.join(config_dir, FAULT_LOG_FILENAME)
with open(fault_file_name, mode="a", encoding="utf8") as fault_file:
faulthandler.enable(fault_file)
exit_code = runner.run(runtime_conf)
faulthandler.disable()
check_threads()
# It's possible for the fault file to disappear, so suppress obvious errors
with suppress(FileNotFoundError):
if os.path.getsize(fault_file_name) == 0:
os.remove(fault_file_name)
return exit_code
check_threads()
return exit_code
if __name__ == "__main__":

View File

@@ -3,10 +3,20 @@
from __future__ import annotations
import asyncio
from collections.abc import Generator
from contextlib import contextmanager
import dataclasses
from datetime import datetime
import fcntl
from io import TextIOWrapper
import json
import logging
import os
from pathlib import Path
import subprocess
import sys
import threading
import time
from time import monotonic
import traceback
from typing import Any
@@ -14,6 +24,7 @@ from typing import Any
import packaging.tags
from . import bootstrap
from .const import __version__
from .core import callback
from .helpers.frame import warn_use
from .util.executor import InterruptibleThreadPoolExecutor
@@ -33,9 +44,113 @@ from .util.thread import deadlock_safe_shutdown
MAX_EXECUTOR_WORKERS = 64
TASK_CANCELATION_TIMEOUT = 5
# Lock file configuration
LOCK_FILE_NAME = ".ha_run.lock"
LOCK_FILE_VERSION = 1 # Increment if format changes
_LOGGER = logging.getLogger(__name__)
@dataclasses.dataclass
class SingleExecutionLock:
"""Context object for single execution lock."""
exit_code: int | None = None
def _write_lock_info(lock_file: TextIOWrapper) -> None:
"""Write current instance information to the lock file.
Args:
lock_file: The open lock file handle.
"""
lock_file.seek(0)
lock_file.truncate()
instance_info = {
"pid": os.getpid(),
"version": LOCK_FILE_VERSION,
"ha_version": __version__,
"start_ts": time.time(),
}
json.dump(instance_info, lock_file)
lock_file.flush()
def _report_existing_instance(lock_file_path: Path, config_dir: str) -> None:
"""Report that another instance is already running.
Attempts to read the lock file to provide details about the running instance.
"""
error_msg: list[str] = []
error_msg.append("Error: Another Home Assistant instance is already running!")
# Try to read information about the existing instance
try:
with open(lock_file_path, encoding="utf-8") as f:
if content := f.read().strip():
existing_info = json.loads(content)
start_dt = datetime.fromtimestamp(existing_info["start_ts"])
# Format with timezone abbreviation if available, otherwise add local time indicator
if tz_abbr := start_dt.strftime("%Z"):
start_time = start_dt.strftime(f"%Y-%m-%d %H:%M:%S {tz_abbr}")
else:
start_time = (
start_dt.strftime("%Y-%m-%d %H:%M:%S") + " (local time)"
)
error_msg.append(f" PID: {existing_info['pid']}")
error_msg.append(f" Version: {existing_info['ha_version']}")
error_msg.append(f" Started: {start_time}")
else:
error_msg.append(" Unable to read lock file details.")
except (json.JSONDecodeError, OSError) as ex:
error_msg.append(f" Unable to read lock file details: {ex}")
error_msg.append(f" Config directory: {config_dir}")
error_msg.append("")
error_msg.append("Please stop the existing instance before starting a new one.")
for line in error_msg:
print(line, file=sys.stderr) # noqa: T201
@contextmanager
def ensure_single_execution(config_dir: str) -> Generator[SingleExecutionLock]:
"""Ensure only one Home Assistant instance runs per config directory.
Uses file locking to prevent multiple instances from running with the
same configuration directory, which can cause data corruption.
Returns a context object with exit_code attribute that will be set
if another instance is already running.
"""
lock_file_path = Path(config_dir) / LOCK_FILE_NAME
lock_context = SingleExecutionLock()
# Open with 'a+' mode to avoid truncating existing content
# This allows us to read existing content if lock fails
with open(lock_file_path, "a+", encoding="utf-8") as lock_file:
# Try to acquire an exclusive, non-blocking lock
# This will raise BlockingIOError if lock is already held
try:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
except BlockingIOError:
# Another instance is already running
_report_existing_instance(lock_file_path, config_dir)
lock_context.exit_code = 1
yield lock_context
return # Exit early since we couldn't get the lock
# If we got the lock (no exception), write our instance info
_write_lock_info(lock_file)
# Yield the context - lock will be released when the with statement closes the file
# IMPORTANT: We don't unlink the file to avoid races where multiple processes
# could create different lock files
yield lock_context
@dataclasses.dataclass(slots=True)
class RuntimeConfig:
"""Class to hold the information for running Home Assistant."""

View File

@@ -2,15 +2,21 @@
import asyncio
from collections.abc import Iterator
import fcntl
import json
import os
from pathlib import Path
import subprocess
import threading
from unittest.mock import patch
import time
from unittest.mock import MagicMock, patch
import packaging.tags
import py
import pytest
from homeassistant import core, runner
from homeassistant.const import __version__
from homeassistant.core import HomeAssistant
from homeassistant.util import executor, thread
@@ -187,3 +193,244 @@ def test_enable_posix_spawn() -> None:
):
runner._enable_posix_spawn()
assert subprocess._USE_POSIX_SPAWN is False
def test_ensure_single_execution_success(tmp_path: Path) -> None:
"""Test successful single instance execution."""
config_dir = str(tmp_path)
lock_file_path = tmp_path / runner.LOCK_FILE_NAME
with runner.ensure_single_execution(config_dir) as lock:
assert lock.exit_code is None
assert lock_file_path.exists()
with open(lock_file_path, encoding="utf-8") as f:
data = json.load(f)
assert data["pid"] == os.getpid()
assert data["version"] == runner.LOCK_FILE_VERSION
assert data["ha_version"] == __version__
assert "start_ts" in data
assert isinstance(data["start_ts"], float)
# Lock file should still exist after context exit (we don't unlink to avoid races)
assert lock_file_path.exists()
def test_ensure_single_execution_blocked(
tmp_path: Path, capfd: pytest.CaptureFixture[str]
) -> None:
"""Test that second instance is blocked when lock exists."""
config_dir = str(tmp_path)
lock_file_path = tmp_path / runner.LOCK_FILE_NAME
# Create and lock the file to simulate another instance
with open(lock_file_path, "w+", encoding="utf-8") as lock_file:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
instance_info = {
"pid": 12345,
"version": 1,
"ha_version": "2025.1.0",
"start_ts": time.time() - 3600, # Started 1 hour ago
}
json.dump(instance_info, lock_file)
lock_file.flush()
with runner.ensure_single_execution(config_dir) as lock:
assert lock.exit_code == 1
captured = capfd.readouterr()
assert "Another Home Assistant instance is already running!" in captured.err
assert "PID: 12345" in captured.err
assert "Version: 2025.1.0" in captured.err
assert "Started: " in captured.err
# Should show local time since naive datetime
assert "(local time)" in captured.err
assert f"Config directory: {config_dir}" in captured.err
def test_ensure_single_execution_corrupt_lock_file(
tmp_path: Path, capfd: pytest.CaptureFixture[str]
) -> None:
"""Test handling of corrupted lock file."""
config_dir = str(tmp_path)
lock_file_path = tmp_path / runner.LOCK_FILE_NAME
with open(lock_file_path, "w+", encoding="utf-8") as lock_file:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
lock_file.write("not valid json{]")
lock_file.flush()
# Try to acquire lock (should set exit_code but handle corrupt file gracefully)
with runner.ensure_single_execution(config_dir) as lock:
assert lock.exit_code == 1
# Check error output
captured = capfd.readouterr()
assert "Another Home Assistant instance is already running!" in captured.err
assert "Unable to read lock file details:" in captured.err
assert f"Config directory: {config_dir}" in captured.err
def test_ensure_single_execution_empty_lock_file(
tmp_path: Path, capfd: pytest.CaptureFixture[str]
) -> None:
"""Test handling of empty lock file."""
config_dir = str(tmp_path)
lock_file_path = tmp_path / runner.LOCK_FILE_NAME
with open(lock_file_path, "w+", encoding="utf-8") as lock_file:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
# Don't write anything - leave it empty
lock_file.flush()
# Try to acquire lock (should set exit_code but handle empty file gracefully)
with runner.ensure_single_execution(config_dir) as lock:
assert lock.exit_code == 1
# Check error output
captured = capfd.readouterr()
assert "Another Home Assistant instance is already running!" in captured.err
assert "Unable to read lock file details." in captured.err
def test_ensure_single_execution_with_timezone(
tmp_path: Path, capfd: pytest.CaptureFixture[str]
) -> None:
"""Test handling of lock file with timezone info (edge case)."""
config_dir = str(tmp_path)
lock_file_path = tmp_path / runner.LOCK_FILE_NAME
# Note: This tests an edge case - our code doesn't create timezone-aware timestamps,
# but we handle them if they exist
with open(lock_file_path, "w+", encoding="utf-8") as lock_file:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
# Started 2 hours ago
instance_info = {
"pid": 54321,
"version": 1,
"ha_version": "2025.2.0",
"start_ts": time.time() - 7200,
}
json.dump(instance_info, lock_file)
lock_file.flush()
with runner.ensure_single_execution(config_dir) as lock:
assert lock.exit_code == 1
captured = capfd.readouterr()
assert "Another Home Assistant instance is already running!" in captured.err
assert "PID: 54321" in captured.err
assert "Version: 2025.2.0" in captured.err
assert "Started: " in captured.err
# Should show local time indicator since fromtimestamp creates naive datetime
assert "(local time)" in captured.err
def test_ensure_single_execution_with_tz_abbreviation(
tmp_path: Path, capfd: pytest.CaptureFixture[str]
) -> None:
"""Test handling of lock file when timezone abbreviation is available."""
config_dir = str(tmp_path)
lock_file_path = tmp_path / runner.LOCK_FILE_NAME
with open(lock_file_path, "w+", encoding="utf-8") as lock_file:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
instance_info = {
"pid": 98765,
"version": 1,
"ha_version": "2025.3.0",
"start_ts": time.time() - 1800, # Started 30 minutes ago
}
json.dump(instance_info, lock_file)
lock_file.flush()
# Mock datetime to return a timezone abbreviation
# We use mocking because strftime("%Z") behavior is OS-specific:
# On some systems it returns empty string for naive datetimes
mock_dt = MagicMock()
def _mock_strftime(fmt: str) -> str:
if fmt == "%Z":
return "PST"
if fmt == "%Y-%m-%d %H:%M:%S":
return "2025-09-03 10:30:45"
return "2025-09-03 10:30:45 PST"
mock_dt.strftime.side_effect = _mock_strftime
with patch("homeassistant.runner.datetime") as mock_datetime:
mock_datetime.fromtimestamp.return_value = mock_dt
with runner.ensure_single_execution(config_dir) as lock:
assert lock.exit_code == 1
captured = capfd.readouterr()
assert "Another Home Assistant instance is already running!" in captured.err
assert "PID: 98765" in captured.err
assert "Version: 2025.3.0" in captured.err
assert "Started: 2025-09-03 10:30:45 PST" in captured.err
# Should NOT have "(local time)" when timezone abbreviation is present
assert "(local time)" not in captured.err
def test_ensure_single_execution_file_not_unlinked(tmp_path: Path) -> None:
"""Test that lock file is never unlinked to avoid race conditions."""
config_dir = str(tmp_path)
lock_file_path = tmp_path / runner.LOCK_FILE_NAME
# First run creates the lock file
with runner.ensure_single_execution(config_dir) as lock:
assert lock.exit_code is None
assert lock_file_path.exists()
# Get inode to verify it's the same file
stat1 = lock_file_path.stat()
# After context exit, file should still exist
assert lock_file_path.exists()
stat2 = lock_file_path.stat()
# Verify it's the exact same file (same inode)
assert stat1.st_ino == stat2.st_ino
# Second run should reuse the same file
with runner.ensure_single_execution(config_dir) as lock:
assert lock.exit_code is None
assert lock_file_path.exists()
stat3 = lock_file_path.stat()
# Still the same file (not recreated)
assert stat1.st_ino == stat3.st_ino
# After second run, still the same file
assert lock_file_path.exists()
stat4 = lock_file_path.stat()
assert stat1.st_ino == stat4.st_ino
def test_ensure_single_execution_sequential_runs(tmp_path: Path) -> None:
"""Test that sequential runs work correctly after lock is released."""
config_dir = str(tmp_path)
lock_file_path = tmp_path / runner.LOCK_FILE_NAME
with runner.ensure_single_execution(config_dir) as lock:
assert lock.exit_code is None
assert lock_file_path.exists()
with open(lock_file_path, encoding="utf-8") as f:
first_data = json.load(f)
# Lock file should still exist after first run (not unlinked)
assert lock_file_path.exists()
# Small delay to ensure different timestamp
time.sleep(0.00001)
with runner.ensure_single_execution(config_dir) as lock:
assert lock.exit_code is None
assert lock_file_path.exists()
with open(lock_file_path, encoding="utf-8") as f:
second_data = json.load(f)
assert second_data["pid"] == os.getpid()
assert second_data["start_ts"] > first_data["start_ts"]
# Lock file should still exist after second run (not unlinked)
assert lock_file_path.exists()