Prevent multiple Home Assistant instances from running with the same config directory (#151631)

2026-02-15 07:36:16 +00:00 · 2025-09-03 13:13:02 -05:00
parent 3385151c26
commit 9b80cf7d94
3 changed files with 394 additions and 26 deletions
--- a/homeassistant/main.py
+++ b/homeassistant/main.py
@@ -187,36 +187,42 @@ def main() -> int:

    from . import config, runner  # noqa: PLC0415

-    safe_mode = config.safe_mode_enabled(config_dir)
+    # Ensure only one instance runs per config directory
+    with runner.ensure_single_execution(config_dir) as single_execution_lock:
+        # Check if another instance is already running
+        if single_execution_lock.exit_code is not None:
+            return single_execution_lock.exit_code

-    runtime_conf = runner.RuntimeConfig(
-        config_dir=config_dir,
-        verbose=args.verbose,
-        log_rotate_days=args.log_rotate_days,
-        log_file=args.log_file,
-        log_no_color=args.log_no_color,
-        skip_pip=args.skip_pip,
-        skip_pip_packages=args.skip_pip_packages,
-        recovery_mode=args.recovery_mode,
-        debug=args.debug,
-        open_ui=args.open_ui,
-        safe_mode=safe_mode,
-    )
+        safe_mode = config.safe_mode_enabled(config_dir)

-    fault_file_name = os.path.join(config_dir, FAULT_LOG_FILENAME)
-    with open(fault_file_name, mode="a", encoding="utf8") as fault_file:
-        faulthandler.enable(fault_file)
-        exit_code = runner.run(runtime_conf)
-        faulthandler.disable()
+        runtime_conf = runner.RuntimeConfig(
+            config_dir=config_dir,
+            verbose=args.verbose,
+            log_rotate_days=args.log_rotate_days,
+            log_file=args.log_file,
+            log_no_color=args.log_no_color,
+            skip_pip=args.skip_pip,
+            skip_pip_packages=args.skip_pip_packages,
+            recovery_mode=args.recovery_mode,
+            debug=args.debug,
+            open_ui=args.open_ui,
+            safe_mode=safe_mode,
+        )

-    # It's possible for the fault file to disappear, so suppress obvious errors
-    with suppress(FileNotFoundError):
-        if os.path.getsize(fault_file_name) == 0:
-            os.remove(fault_file_name)
+        fault_file_name = os.path.join(config_dir, FAULT_LOG_FILENAME)
+        with open(fault_file_name, mode="a", encoding="utf8") as fault_file:
+            faulthandler.enable(fault_file)
+            exit_code = runner.run(runtime_conf)
+            faulthandler.disable()

-    check_threads()
+        # It's possible for the fault file to disappear, so suppress obvious errors
+        with suppress(FileNotFoundError):
+            if os.path.getsize(fault_file_name) == 0:
+                os.remove(fault_file_name)

-    return exit_code
+        check_threads()
+
+        return exit_code


 if __name__ == "__main__":
--- a/homeassistant/runner.py
+++ b/homeassistant/runner.py
@@ -3,10 +3,20 @@
 from __future__ import annotations

 import asyncio
+from collections.abc import Generator
+from contextlib import contextmanager
 import dataclasses
+from datetime import datetime
+import fcntl
+from io import TextIOWrapper
+import json
 import logging
+import os
+from pathlib import Path
 import subprocess
+import sys
 import threading
+import time
 from time import monotonic
 import traceback
 from typing import Any
@@ -14,6 +24,7 @@ from typing import Any
 import packaging.tags

 from . import bootstrap
+from .const import __version__
 from .core import callback
 from .helpers.frame import warn_use
 from .util.executor import InterruptibleThreadPoolExecutor
@@ -33,9 +44,113 @@ from .util.thread import deadlock_safe_shutdown
 MAX_EXECUTOR_WORKERS = 64
 TASK_CANCELATION_TIMEOUT = 5

+# Lock file configuration
+LOCK_FILE_NAME = ".ha_run.lock"
+LOCK_FILE_VERSION = 1  # Increment if format changes
+
 _LOGGER = logging.getLogger(__name__)


+@dataclasses.dataclass
+class SingleExecutionLock:
+    """Context object for single execution lock."""
+
+    exit_code: int | None = None
+
+
+def _write_lock_info(lock_file: TextIOWrapper) -> None:
+    """Write current instance information to the lock file.
+
+    Args:
+        lock_file: The open lock file handle.
+    """
+    lock_file.seek(0)
+    lock_file.truncate()
+
+    instance_info = {
+        "pid": os.getpid(),
+        "version": LOCK_FILE_VERSION,
+        "ha_version": __version__,
+        "start_ts": time.time(),
+    }
+    json.dump(instance_info, lock_file)
+    lock_file.flush()
+
+
+def _report_existing_instance(lock_file_path: Path, config_dir: str) -> None:
+    """Report that another instance is already running.
+
+    Attempts to read the lock file to provide details about the running instance.
+    """
+    error_msg: list[str] = []
+    error_msg.append("Error: Another Home Assistant instance is already running!")
+
+    # Try to read information about the existing instance
+    try:
+        with open(lock_file_path, encoding="utf-8") as f:
+            if content := f.read().strip():
+                existing_info = json.loads(content)
+                start_dt = datetime.fromtimestamp(existing_info["start_ts"])
+                # Format with timezone abbreviation if available, otherwise add local time indicator
+                if tz_abbr := start_dt.strftime("%Z"):
+                    start_time = start_dt.strftime(f"%Y-%m-%d %H:%M:%S {tz_abbr}")
+                else:
+                    start_time = (
+                        start_dt.strftime("%Y-%m-%d %H:%M:%S") + " (local time)"
+                    )
+
+                error_msg.append(f"  PID: {existing_info['pid']}")
+                error_msg.append(f"  Version: {existing_info['ha_version']}")
+                error_msg.append(f"  Started: {start_time}")
+            else:
+                error_msg.append("  Unable to read lock file details.")
+    except (json.JSONDecodeError, OSError) as ex:
+        error_msg.append(f"  Unable to read lock file details: {ex}")
+
+    error_msg.append(f"  Config directory: {config_dir}")
+    error_msg.append("")
+    error_msg.append("Please stop the existing instance before starting a new one.")
+
+    for line in error_msg:
+        print(line, file=sys.stderr)  # noqa: T201
+
+
+@contextmanager
+def ensure_single_execution(config_dir: str) -> Generator[SingleExecutionLock]:
+    """Ensure only one Home Assistant instance runs per config directory.
+
+    Uses file locking to prevent multiple instances from running with the
+    same configuration directory, which can cause data corruption.
+
+    Returns a context object with exit_code attribute that will be set
+    if another instance is already running.
+    """
+    lock_file_path = Path(config_dir) / LOCK_FILE_NAME
+    lock_context = SingleExecutionLock()
+
+    # Open with 'a+' mode to avoid truncating existing content
+    # This allows us to read existing content if lock fails
+    with open(lock_file_path, "a+", encoding="utf-8") as lock_file:
+        # Try to acquire an exclusive, non-blocking lock
+        # This will raise BlockingIOError if lock is already held
+        try:
+            fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+        except BlockingIOError:
+            # Another instance is already running
+            _report_existing_instance(lock_file_path, config_dir)
+            lock_context.exit_code = 1
+            yield lock_context
+            return  # Exit early since we couldn't get the lock
+
+        # If we got the lock (no exception), write our instance info
+        _write_lock_info(lock_file)
+
+        # Yield the context - lock will be released when the with statement closes the file
+        # IMPORTANT: We don't unlink the file to avoid races where multiple processes
+        # could create different lock files
+        yield lock_context
+
+
@dataclasses.dataclass(slots=True)
 class RuntimeConfig:
    """Class to hold the information for running Home Assistant."""
--- a/tests/test_runner.py
+++ b/tests/test_runner.py
@@ -2,15 +2,21 @@

 import asyncio
 from collections.abc import Iterator
+import fcntl
+import json
+import os
+from pathlib import Path
 import subprocess
 import threading
-from unittest.mock import patch
+import time
+from unittest.mock import MagicMock, patch

 import packaging.tags
 import py
 import pytest

 from homeassistant import core, runner
+from homeassistant.const import __version__
 from homeassistant.core import HomeAssistant
 from homeassistant.util import executor, thread

@@ -187,3 +193,244 @@ def test_enable_posix_spawn() -> None:
    ):
        runner._enable_posix_spawn()
        assert subprocess._USE_POSIX_SPAWN is False
+
+
+def test_ensure_single_execution_success(tmp_path: Path) -> None:
+    """Test successful single instance execution."""
+    config_dir = str(tmp_path)
+    lock_file_path = tmp_path / runner.LOCK_FILE_NAME
+
+    with runner.ensure_single_execution(config_dir) as lock:
+        assert lock.exit_code is None
+        assert lock_file_path.exists()
+
+        with open(lock_file_path, encoding="utf-8") as f:
+            data = json.load(f)
+            assert data["pid"] == os.getpid()
+            assert data["version"] == runner.LOCK_FILE_VERSION
+            assert data["ha_version"] == __version__
+            assert "start_ts" in data
+            assert isinstance(data["start_ts"], float)
+
+    # Lock file should still exist after context exit (we don't unlink to avoid races)
+    assert lock_file_path.exists()
+
+
+def test_ensure_single_execution_blocked(
+    tmp_path: Path, capfd: pytest.CaptureFixture[str]
+) -> None:
+    """Test that second instance is blocked when lock exists."""
+    config_dir = str(tmp_path)
+    lock_file_path = tmp_path / runner.LOCK_FILE_NAME
+
+    # Create and lock the file to simulate another instance
+    with open(lock_file_path, "w+", encoding="utf-8") as lock_file:
+        fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+
+        instance_info = {
+            "pid": 12345,
+            "version": 1,
+            "ha_version": "2025.1.0",
+            "start_ts": time.time() - 3600,  # Started 1 hour ago
+        }
+        json.dump(instance_info, lock_file)
+        lock_file.flush()
+
+        with runner.ensure_single_execution(config_dir) as lock:
+            assert lock.exit_code == 1
+
+        captured = capfd.readouterr()
+        assert "Another Home Assistant instance is already running!" in captured.err
+        assert "PID: 12345" in captured.err
+        assert "Version: 2025.1.0" in captured.err
+        assert "Started: " in captured.err
+        # Should show local time since naive datetime
+        assert "(local time)" in captured.err
+        assert f"Config directory: {config_dir}" in captured.err
+
+
+def test_ensure_single_execution_corrupt_lock_file(
+    tmp_path: Path, capfd: pytest.CaptureFixture[str]
+) -> None:
+    """Test handling of corrupted lock file."""
+    config_dir = str(tmp_path)
+    lock_file_path = tmp_path / runner.LOCK_FILE_NAME
+
+    with open(lock_file_path, "w+", encoding="utf-8") as lock_file:
+        fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+        lock_file.write("not valid json{]")
+        lock_file.flush()
+
+        # Try to acquire lock (should set exit_code but handle corrupt file gracefully)
+        with runner.ensure_single_execution(config_dir) as lock:
+            assert lock.exit_code == 1
+
+        # Check error output
+        captured = capfd.readouterr()
+        assert "Another Home Assistant instance is already running!" in captured.err
+        assert "Unable to read lock file details:" in captured.err
+        assert f"Config directory: {config_dir}" in captured.err
+
+
+def test_ensure_single_execution_empty_lock_file(
+    tmp_path: Path, capfd: pytest.CaptureFixture[str]
+) -> None:
+    """Test handling of empty lock file."""
+    config_dir = str(tmp_path)
+    lock_file_path = tmp_path / runner.LOCK_FILE_NAME
+
+    with open(lock_file_path, "w+", encoding="utf-8") as lock_file:
+        fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+        # Don't write anything - leave it empty
+        lock_file.flush()
+
+        # Try to acquire lock (should set exit_code but handle empty file gracefully)
+        with runner.ensure_single_execution(config_dir) as lock:
+            assert lock.exit_code == 1
+
+        # Check error output
+        captured = capfd.readouterr()
+        assert "Another Home Assistant instance is already running!" in captured.err
+        assert "Unable to read lock file details." in captured.err
+
+
+def test_ensure_single_execution_with_timezone(
+    tmp_path: Path, capfd: pytest.CaptureFixture[str]
+) -> None:
+    """Test handling of lock file with timezone info (edge case)."""
+    config_dir = str(tmp_path)
+    lock_file_path = tmp_path / runner.LOCK_FILE_NAME
+
+    # Note: This tests an edge case - our code doesn't create timezone-aware timestamps,
+    # but we handle them if they exist
+    with open(lock_file_path, "w+", encoding="utf-8") as lock_file:
+        fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+
+        # Started 2 hours ago
+        instance_info = {
+            "pid": 54321,
+            "version": 1,
+            "ha_version": "2025.2.0",
+            "start_ts": time.time() - 7200,
+        }
+        json.dump(instance_info, lock_file)
+        lock_file.flush()
+
+        with runner.ensure_single_execution(config_dir) as lock:
+            assert lock.exit_code == 1
+
+        captured = capfd.readouterr()
+        assert "Another Home Assistant instance is already running!" in captured.err
+        assert "PID: 54321" in captured.err
+        assert "Version: 2025.2.0" in captured.err
+        assert "Started: " in captured.err
+        # Should show local time indicator since fromtimestamp creates naive datetime
+        assert "(local time)" in captured.err
+
+
+def test_ensure_single_execution_with_tz_abbreviation(
+    tmp_path: Path, capfd: pytest.CaptureFixture[str]
+) -> None:
+    """Test handling of lock file when timezone abbreviation is available."""
+    config_dir = str(tmp_path)
+    lock_file_path = tmp_path / runner.LOCK_FILE_NAME
+
+    with open(lock_file_path, "w+", encoding="utf-8") as lock_file:
+        fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+
+        instance_info = {
+            "pid": 98765,
+            "version": 1,
+            "ha_version": "2025.3.0",
+            "start_ts": time.time() - 1800,  # Started 30 minutes ago
+        }
+        json.dump(instance_info, lock_file)
+        lock_file.flush()
+
+        # Mock datetime to return a timezone abbreviation
+        # We use mocking because strftime("%Z") behavior is OS-specific:
+        # On some systems it returns empty string for naive datetimes
+        mock_dt = MagicMock()
+
+        def _mock_strftime(fmt: str) -> str:
+            if fmt == "%Z":
+                return "PST"
+            if fmt == "%Y-%m-%d %H:%M:%S":
+                return "2025-09-03 10:30:45"
+            return "2025-09-03 10:30:45 PST"
+
+        mock_dt.strftime.side_effect = _mock_strftime
+
+        with patch("homeassistant.runner.datetime") as mock_datetime:
+            mock_datetime.fromtimestamp.return_value = mock_dt
+            with runner.ensure_single_execution(config_dir) as lock:
+                assert lock.exit_code == 1
+
+        captured = capfd.readouterr()
+        assert "Another Home Assistant instance is already running!" in captured.err
+        assert "PID: 98765" in captured.err
+        assert "Version: 2025.3.0" in captured.err
+        assert "Started: 2025-09-03 10:30:45 PST" in captured.err
+        # Should NOT have "(local time)" when timezone abbreviation is present
+        assert "(local time)" not in captured.err
+
+
+def test_ensure_single_execution_file_not_unlinked(tmp_path: Path) -> None:
+    """Test that lock file is never unlinked to avoid race conditions."""
+    config_dir = str(tmp_path)
+    lock_file_path = tmp_path / runner.LOCK_FILE_NAME
+
+    # First run creates the lock file
+    with runner.ensure_single_execution(config_dir) as lock:
+        assert lock.exit_code is None
+        assert lock_file_path.exists()
+        # Get inode to verify it's the same file
+        stat1 = lock_file_path.stat()
+
+    # After context exit, file should still exist
+    assert lock_file_path.exists()
+    stat2 = lock_file_path.stat()
+    # Verify it's the exact same file (same inode)
+    assert stat1.st_ino == stat2.st_ino
+
+    # Second run should reuse the same file
+    with runner.ensure_single_execution(config_dir) as lock:
+        assert lock.exit_code is None
+        assert lock_file_path.exists()
+        stat3 = lock_file_path.stat()
+        # Still the same file (not recreated)
+        assert stat1.st_ino == stat3.st_ino
+
+    # After second run, still the same file
+    assert lock_file_path.exists()
+    stat4 = lock_file_path.stat()
+    assert stat1.st_ino == stat4.st_ino
+
+
+def test_ensure_single_execution_sequential_runs(tmp_path: Path) -> None:
+    """Test that sequential runs work correctly after lock is released."""
+    config_dir = str(tmp_path)
+    lock_file_path = tmp_path / runner.LOCK_FILE_NAME
+
+    with runner.ensure_single_execution(config_dir) as lock:
+        assert lock.exit_code is None
+        assert lock_file_path.exists()
+        with open(lock_file_path, encoding="utf-8") as f:
+            first_data = json.load(f)
+
+    # Lock file should still exist after first run (not unlinked)
+    assert lock_file_path.exists()
+
+    # Small delay to ensure different timestamp
+    time.sleep(0.00001)
+
+    with runner.ensure_single_execution(config_dir) as lock:
+        assert lock.exit_code is None
+        assert lock_file_path.exists()
+        with open(lock_file_path, encoding="utf-8") as f:
+            second_data = json.load(f)
+            assert second_data["pid"] == os.getpid()
+            assert second_data["start_ts"] > first_data["start_ts"]
+
+    # Lock file should still exist after second run (not unlinked)
+    assert lock_file_path.exists()