ecu-tests/ecu_framework/config/loader.py

"""
Configuration loader: YAML + environment + in-memory overrides → typed dataclasses.

Design at a glance
==================

The loader is a small pipeline:

    defaults (dict)
        └─▶ merge YAML at $ECU_TESTS_CONFIG     (if env var set & file exists)
                └─▶ merge YAML at workspace_root/config/test_config.yaml   (if exists)
                        └─▶ merge PSU side-channel YAML (env OWON_PSU_CONFIG or workspace_root/config/owon_psu.yaml)
                                └─▶ merge in-memory overrides (caller-supplied)
                                        └─▶ coerce types & build EcuTestConfig dataclass

The "merge" step is a recursive dict update (see ``_deep_update``). Nested dicts
combine key-by-key; everything else is replaced wholesale. The final ``_to_dataclass``
step does *defensive* type coercion — YAML happily produces strings where ints are
expected, so we cast at the boundary rather than trusting the parser.

Why this shape
==============

- **Two layers (dict → dataclass).** The merge happens at the dict layer because
  ``_deep_update`` is dict-shaped and easy to reason about. The dataclass layer is
  the *public* contract callers use. Keeping these separate means the merge
  semantics don't leak into consumer code.
- **PSU side-channel.** Serial port settings are bench-specific and shouldn't be
  committed alongside test config. The optional ``owon_psu.yaml`` (or
  ``$OWON_PSU_CONFIG``) lets users keep them out of version control while still
  participating in the precedence stack.
- **In-memory overrides last.** The docstring of ``load_config`` lists overrides
  as precedence #1 (highest). In the code they're applied *last* — that's exactly
  what "highest precedence" means in a sequential-merge model: the last writer wins.

Known minor wart
================

Defaults live in two places: as dataclass field defaults (e.g. ``type: str = "mock"``)
*and* in the ``base`` dict inside ``load_config``. Both must agree, and a drift
between them would be silently wrong. The base dict exists because the merge step
needs a starting dict; the dataclass defaults exist because callers may construct
configs directly without going through ``load_config``. If a third caller path
appears, consider extracting defaults to a single ``DEFAULTS`` mapping.
"""
from __future__ import annotations  # PEP 563: makes type annotations strings, so forward references like the one in EcuTestConfig.power_supply don't require reordering definitions.

import os  # Environment variables (ECU_TESTS_CONFIG, OWON_PSU_CONFIG) and filesystem checks
import pathlib  # Cross-platform path handling; preferred over os.path for new code
from dataclasses import dataclass, field  # field(default_factory=...) is required for any mutable default (dict, list, nested dataclass)
from typing import Any, Dict, Optional  # Any is used at the YAML boundary where we can't promise more

import yaml  # PyYAML; we only ever use safe_load — never load() — because YAML can be a code-execution vector


# ---------------------------------------------------------------------------
# Dataclass schema
# ---------------------------------------------------------------------------
# These three dataclasses are the public contract: anything outside this module
# that wants to know "what is configurable" reads them. Adding a field here is
# the only place you need to touch to surface a new option — _to_dataclass()
# below will need a matching coercion line.

@dataclass
class FlashConfig:
    """Flashing-related configuration.

    Attributes:
        enabled: Whether to trigger ECU flashing at session start. Default off
            so unit/mock runs never touch hardware.
        hex_path: Path to the firmware HEX file. ``None`` means "no flashing
            possible even if enabled is True" — callers must check.
    """

    enabled: bool = False
    hex_path: Optional[str] = None


@dataclass
class InterfaceConfig:
    """LIN interface configuration — covers all three adapter types in one schema.

    Fields are grouped by which adapter consumes them; fields not relevant to the
    selected ``type`` are simply ignored at runtime. Keeping them in one dataclass
    (rather than a per-adapter union) means YAML files don't need to change shape
    when you switch between mock / MUM / BabyLIN.

    Attributes:
        type: Adapter selector — ``"mock"`` (no hardware), ``"mum"`` (Melexis
            Universal Master — the current hardware path), or ``"babylin"``
            (DEPRECATED — kept only so existing rigs keep working).
        channel: BabyLIN channel index (0-based). Ignored by MUM and mock.
        bitrate: Effective LIN bitrate in bit/s. The MUM applies it directly;
            BabyLIN typically takes it from the SDF, so this field is
            informational in that case.
        dll_path: DEPRECATED. Pointer to vendor DLLs from the old ctypes-based
            BabyLIN adapter. The SDK wrapper does not use this.
        node_name: Optional friendly identifier for logs/reports.
        func_names: DEPRECATED. Was a remapping table for the ctypes adapter's
            function names; ignored by the SDK wrapper.
        sdf_path: DEPRECATED (BabyLIN). Path to the SDF that BabyLIN loads
            on connect. Required for typical BabyLIN operation.
        schedule_nr: DEPRECATED (BabyLIN). Schedule index to start after
            connect. ``-1`` means "do not start any schedule".
        host: MUM IP address (MUM only). Required when ``type == "mum"``.
            The MUM's USB-RNDIS default is ``192.168.7.2``.
        lin_device: MUM LIN device name. Default ``"lin0"`` matches MUM
            firmware conventions.
        power_device: MUM power-control device name. Default ``"power_out0"``
            is the standard MUM power-out channel.
        boot_settle_seconds: Sleep after MUM power-up before the master sends
            its first frame. Tuning this avoids brown-outs on slow-booting ECUs.
        frame_lengths: ``{frame_id: data_length}`` map used by the MUM to know
            how many bytes to read from slave-published frames. Keys may be
            written as hex strings in YAML (``0x0A``) — see _to_dataclass().
        ldf_path: Optional path to an LDF file. When set, an ``ldf`` fixture
            can expose an ``LdfDatabase`` for ``pack``/``unpack``, and the MUM
            adapter auto-merges frame lengths from the LDF. Relative paths
            resolve against the workspace root.
    """

    # Adapter selector.
    type: str = "mock"
    # BabyLIN-only knobs (deprecated path)
    channel: int = 1
    bitrate: int = 19200
    dll_path: Optional[str] = None
    node_name: Optional[str] = None
    func_names: Dict[str, str] = field(default_factory=dict)
    sdf_path: Optional[str] = None
    schedule_nr: int = 0
    # MUM-only knobs
    host: Optional[str] = None
    lin_device: str = "lin0"
    power_device: str = "power_out0"
    boot_settle_seconds: float = 0.5
    # MUM frame-length hints (and LDF override target)
    frame_lengths: Dict[int, int] = field(default_factory=dict)
    # LDF integration — shared by tests + MUM adapter
    ldf_path: Optional[str] = None


@dataclass
class EcuTestConfig:
    """Top-level typed configuration container.

    This is what ``load_config()`` returns and what most fixtures/tests
    type-annotate against. New top-level config groups (e.g. a future
    "reporting" section) get added here as a new ``field()``.

    Note on field ordering:
        ``power_supply`` is annotated as the string ``"PowerSupplyConfig"``
        and uses a lambda default_factory because ``PowerSupplyConfig`` is
        defined *below* this class. The ``from __future__ import annotations``
        import at the top of the module turns all annotations into strings,
        and the lambda defers the name lookup until ``EcuTestConfig()`` is
        actually instantiated — by which point ``PowerSupplyConfig`` exists
        in the module namespace. This lets us keep ``EcuTestConfig`` at the
        top as the "main" type readers see first.
    """

    interface: InterfaceConfig = field(default_factory=InterfaceConfig)
    flash: FlashConfig = field(default_factory=FlashConfig)
    # Forward reference resolved at instantiation time — see the note above.
    power_supply: "PowerSupplyConfig" = field(default_factory=lambda: PowerSupplyConfig())


@dataclass
class PowerSupplyConfig:
    """Serial power supply (Owon) configuration.

    Defined after ``EcuTestConfig`` deliberately so the most-used type appears
    at the top of the file; see the ordering note in ``EcuTestConfig``.

    Attributes:
        enabled: Master switch — when False, PSU-dependent tests skip and
            ``owon_psu`` helpers no-op rather than open a serial port.
        port: Serial device. Windows-style (``COM4``) or POSIX-style
            (``/dev/ttyUSB0``); the cross-platform resolver in
            ``ecu_framework.power`` normalizes between them.
        baudrate / timeout / eol: Standard line settings. ``eol`` is either
            ``"\\n"`` or ``"\\r\\n"`` depending on the device firmware.
        parity / stopbits: Standard serial framing knobs.
        xonxoff / rtscts / dsrdtr: Flow-control flags; most Owon units want
            all three off.
        idn_substr: If set, the PSU helper will assert that the response to
            ``*IDN?`` contains this substring before proceeding — guards
            against picking up the wrong device on a multi-COM bench.
        do_set / set_voltage / set_current: Convenience knobs for the demo
            and smoke tests; production test cases drive the PSU directly.
    """

    enabled: bool = False
    port: Optional[str] = None
    baudrate: int = 115200
    timeout: float = 1.0
    eol: str = "\n"
    parity: str = "N"        # one of "N", "E", "O"
    stopbits: float = 1.0    # 1 or 2 (float, since pyserial accepts 1.5 for some chips)
    xonxoff: bool = False
    rtscts: bool = False
    dsrdtr: bool = False
    idn_substr: Optional[str] = None
    do_set: bool = False
    set_voltage: float = 1.0
    set_current: float = 0.1


# ---------------------------------------------------------------------------
# Public constants
# ---------------------------------------------------------------------------
# Surface as part of the public API so callers can override paths consistently
# (e.g., a custom CLI tool that wants to read the same env var as the loader).

DEFAULT_CONFIG_RELATIVE = pathlib.Path("config") / "test_config.yaml"  # Path under workspace_root that the loader looks for when no env var is set.
ENV_CONFIG_PATH = "ECU_TESTS_CONFIG"  # Env var name; an absolute or relative path to a YAML file. Wins over DEFAULT_CONFIG_RELATIVE but loses to in-memory overrides.


# ---------------------------------------------------------------------------
# Internal merge helper
# ---------------------------------------------------------------------------

def _deep_update(base: Dict[str, Any], updates: Dict[str, Any]) -> Dict[str, Any]:
    """Recursively merge ``updates`` into ``base``.

    Semantics:
        - If a key holds a dict on *both* sides, recurse so nested sections
          combine key-by-key. This is what makes YAML overlays predictable:
          you can override a single nested key without re-stating the whole
          section.
        - If a key holds a non-dict on either side, the value from ``updates``
          replaces what was in ``base`` wholesale. Lists are *replaced*, not
          concatenated — that's a deliberate choice: list-concat semantics
          surprise users who expect "set this list to X" to mean exactly that.
        - Mutation happens in place on ``base``. The function returns the
          same object for chaining convenience (used by the PSU merge below).

    Why mutate in place:
        Performance is not the reason — the configs are tiny. The reason is
        that the caller (``load_config``) builds ``base`` once and threads it
        through several merge steps; copying at each step would obscure the
        sequential precedence story.
    """
    for k, v in updates.items():
        # Both sides are dicts → recurse so we don't clobber sibling keys.
        if isinstance(v, dict) and isinstance(base.get(k), dict):
            base[k] = _deep_update(base[k], v)
        else:
            # Scalar / list / mismatched types → replace.
            base[k] = v
    return base


# ---------------------------------------------------------------------------
# Dict → dataclass coercion
# ---------------------------------------------------------------------------

def _to_dataclass(cfg: Dict[str, Any]) -> EcuTestConfig:
    """Convert a merged plain-dict config into strongly-typed dataclasses.

    Why defensive casting:
        YAML's type inference is generous — a value that *looks* like a number
        may come through as a string (e.g. when the user quotes ``"19200"``)
        and a bool may come through as the string ``"true"``. Rather than
        propagate that fuzziness, we cast at this boundary so downstream code
        gets the types it actually annotated against. Casts that fail raise,
        which is the right behavior: a config that can't be interpreted is a
        bug to surface early.

    Notes on specific fields:
        - ``type`` is lowercased so YAML like ``"MUM"`` or ``"Mock"`` works.
        - ``frame_lengths`` keys are parsed with ``int(k, 0)`` when the key
          is a string. The ``0`` base means "infer from prefix": ``"0x0A"``
          parses as hex, ``"10"`` as decimal. Invalid keys are skipped
          silently rather than failing the whole load — a typo in one frame
          shouldn't abort startup.
    """
    iface = cfg.get("interface", {})
    flash = cfg.get("flash", {})
    psu = cfg.get("power_supply", {})

    # ---- frame_lengths key coercion ----
    # Goal: accept both ``0x0A: 8`` (YAML hex int) and ``"0x0A": 8`` (string-keyed
    # because some YAML writers quote keys). int(k, 0) handles both; skipping bad
    # entries is intentional (see docstring).
    raw_fl = iface.get("frame_lengths", {}) or {}
    frame_lengths: Dict[int, int] = {}
    if isinstance(raw_fl, dict):
        for k, v in raw_fl.items():
            try:
                key = int(k, 0) if isinstance(k, str) else int(k)
                frame_lengths[key] = int(v)
            except (TypeError, ValueError):
                # Bad entry — skip silently so one typo doesn't break startup.
                continue

    return EcuTestConfig(
        interface=InterfaceConfig(
            type=str(iface.get("type", "mock")).lower(),
            channel=int(iface.get("channel", 1)),
            bitrate=int(iface.get("bitrate", 19200)),
            dll_path=iface.get("dll_path"),
            node_name=iface.get("node_name"),
            func_names=dict(iface.get("func_names", {}) or {}),
            sdf_path=iface.get("sdf_path"),
            schedule_nr=int(iface.get("schedule_nr", 0)),
            host=iface.get("host"),
            lin_device=str(iface.get("lin_device", "lin0")),
            power_device=str(iface.get("power_device", "power_out0")),
            boot_settle_seconds=float(iface.get("boot_settle_seconds", 0.5)),
            frame_lengths=frame_lengths,
            ldf_path=iface.get("ldf_path"),
        ),
        flash=FlashConfig(
            enabled=bool(flash.get("enabled", False)),
            hex_path=flash.get("hex_path"),
        ),
        power_supply=PowerSupplyConfig(
            enabled=bool(psu.get("enabled", False)),
            port=psu.get("port"),
            baudrate=int(psu.get("baudrate", 115200)),
            timeout=float(psu.get("timeout", 1.0)),
            eol=str(psu.get("eol", "\n")),
            parity=str(psu.get("parity", "N")),
            stopbits=float(psu.get("stopbits", 1.0)),
            xonxoff=bool(psu.get("xonxoff", False)),
            rtscts=bool(psu.get("rtscts", False)),
            dsrdtr=bool(psu.get("dsrdtr", False)),
            idn_substr=psu.get("idn_substr"),
            do_set=bool(psu.get("do_set", False)),
            set_voltage=float(psu.get("set_voltage", 1.0)),
            set_current=float(psu.get("set_current", 0.1)),
        ),
    )


# ---------------------------------------------------------------------------
# Public entry point
# ---------------------------------------------------------------------------

def load_config(workspace_root: Optional[str] = None, overrides: Optional[Dict[str, Any]] = None) -> EcuTestConfig:
    """Load configuration from defaults, YAML files, and in-memory overrides.

    Args:
        workspace_root: Repository root used to resolve the default config path
            (``<workspace_root>/config/test_config.yaml``) and the optional
            PSU YAML (``<workspace_root>/config/owon_psu.yaml``). When
            ``None``, those file lookups are skipped — useful for unit tests
            that want to drive the loader purely from ``overrides``.
        overrides: An optional dict applied last. Use this from tests that
            need to flip a single value without writing a YAML file.

    Returns:
        A fully-populated ``EcuTestConfig``. Never returns ``None``; missing
        sources fall back to defaults rather than failing.

    Precedence (highest wins):
        1. ``overrides``                   (in-memory)
        2. YAML at ``$ECU_TESTS_CONFIG``   (env var → file)
        3. YAML at ``workspace_root/config/test_config.yaml``
        4. Built-in defaults

        In the implementation below, the steps are *applied* in the reverse
        order (lowest first, highest last) because each merge replaces values
        from the previous one — so the *last* writer wins, which is by design
        the *highest*-precedence source.
    """
    # 4) Built-in defaults — the floor everything else builds on.
    # NOTE: these duplicate the dataclass field defaults. See the module docstring's
    # "Known minor wart" section for why and what to do if a third caller path appears.
    base: Dict[str, Any] = {
        "interface": {
            "type": "mock",
            "channel": 1,
            "bitrate": 19200,
        },
        "flash": {
            "enabled": False,
            "hex_path": None,
        },
        "power_supply": {
            "enabled": False,
            "port": None,
            "baudrate": 115200,
            "timeout": 1.0,
            "eol": "\n",
            "parity": "N",
            "stopbits": 1.0,
            "xonxoff": False,
            "rtscts": False,
            "dsrdtr": False,
            "idn_substr": None,
            "do_set": False,
            "set_voltage": 1.0,
            "set_current": 0.1,
        },
    }

    # Resolve which YAML file (if any) to load for the main config.
    cfg_path: Optional[pathlib.Path] = None

    # 3) Env var ECU_TESTS_CONFIG — wins over the workspace default.
    # We only accept the path if the file actually exists; pointing at a
    # missing file is treated as "no env override" rather than an error so
    # CI environments can have the var set unconditionally.
    env_path = os.getenv(ENV_CONFIG_PATH)
    if env_path:
        candidate = pathlib.Path(env_path)
        if candidate.is_file():
            cfg_path = candidate

    # 2) Workspace-relative default — used when no env override is in play.
    if cfg_path is None and workspace_root:
        candidate = pathlib.Path(workspace_root) / DEFAULT_CONFIG_RELATIVE
        if candidate.is_file():
            cfg_path = candidate

    # Apply the main YAML overlay if resolved.
    if cfg_path and cfg_path.is_file():
        with open(cfg_path, "r", encoding="utf-8") as f:
            file_cfg = yaml.safe_load(f) or {}  # yaml.safe_load returns None for an empty file — normalize to {}.
            if isinstance(file_cfg, dict):  # A YAML scalar/list at the top level would parse but isn't a valid config shape; ignore it.
                _deep_update(base, file_cfg)

    # ---- PSU side-channel ----
    # Why a side-channel: bench-specific serial port settings (COM4 vs
    # /dev/ttyUSB0, baudrate quirks, IDN substring) should usually NOT live
    # in the committed test config. Splitting them into their own file lets
    # users gitignore ``config/owon_psu.yaml`` while still committing
    # ``config/test_config.yaml``. The env var OWON_PSU_CONFIG mirrors the
    # main config's env var pattern.
    psu_env = os.getenv("OWON_PSU_CONFIG")
    psu_default = None
    if workspace_root:
        candidate = pathlib.Path(workspace_root) / "config" / "owon_psu.yaml"
        if candidate.is_file():
            psu_default = candidate
    psu_path: Optional[pathlib.Path] = pathlib.Path(psu_env) if psu_env else psu_default
    if psu_path and psu_path.is_file():
        with open(psu_path, "r", encoding="utf-8") as f:
            psu_cfg = yaml.safe_load(f) or {}
            if isinstance(psu_cfg, dict):
                # Ensure the section exists before deep-merging into it.
                base.setdefault("power_supply", {})
                base["power_supply"] = _deep_update(base["power_supply"], psu_cfg)

    # 1) In-memory overrides — applied LAST so they win over all file sources.
    if overrides:
        _deep_update(base, overrides)

    # Final step: cast the merged dict into typed dataclasses for callers.
    return _to_dataclass(base)