test(validation): negative load-error checks; keep run logs out of the repo

load_errors_check.py loads deliberately broken .tum fixtures in batch on the build under test (like lsp_check.py) and asserts each fails with its specific located message and without a raw traceback. Wired into run.sh just before the main suite, so it runs for every channel. The run validation items now point their sub-instance log at the gitignored report dir, so a GUI run no longer litters the tree with sub_*.log files. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-15 14:40:24 +02:00
parent 1ea360e5a5
commit 7abd8c07a6
11 changed files with 153 additions and 0 deletions
--- a/test/validation/items/run/test.tum
+++ b/test/validation/items/run/test.tum
@@ -2,24 +2,31 @@
 # In batch mode the sub-instance runs with -b; in GUI mode with -r.
 # The run item result is SUCCESS if the sub-instance launched successfully,
 # regardless of its own test result.
 #
 # log_file points the sub-instance log at the throwaway report dir (gitignored)
 # so a GUI run does not litter the repo with sub_*.log files.
 - run:
    name: run PASS (valid file, passing sub-test)
    key: $(test)_PASS
    tum: $(test_path)$(psep)sub_pass.tum
    log_file: $(validation_report_path)$(psep)run_sub.log
 - run:
    name: run PASS (valid file, failing sub-test)
    key: $(test)_PASS
    tum: $(test_path)$(psep)sub_fail.tum
    log_file: $(validation_report_path)$(psep)run_sub.log
 - run:
    name: run FAIL (file not found)
    key: $(test)_FAIL
    tum: $(test_path)$(psep)non_existent.tum
    log_file: $(validation_report_path)$(psep)run_sub.log
 - run:
    name: run FAIL (wait_for_exec without time window)
    key: $(test)_FAIL
    tum: $(test_path)$(psep)sub_pass.tum
    wait_for_exec: true
    log_file: $(validation_report_path)$(psep)run_sub.log
--- a/test/validation/load_errors/bad_include.tum
+++ b/test/validation/load_errors/bad_include.tum
@@ -0,0 +1,9 @@
 main:
  name: root
  steps:
    - sleep:
        name: ok
        timeout: 0
    # The structural error lives in the included file; the message must point
    # the user at that file, not at this one.
    - !include bad_include_inc.tum
--- a/test/validation/load_errors/bad_include_inc.tum
+++ b/test/validation/load_errors/bad_include_inc.tum
@@ -0,0 +1,4 @@
 # Included as a bare list of steps. The unknown item below must be reported
 # with THIS file as the location.
 - frobnicate_in_include:
    name: nope
--- a/test/validation/load_errors/group_no_steps.tum
+++ b/test/validation/load_errors/group_no_steps.tum
@@ -0,0 +1,6 @@
 main:
  name: root
  steps:
    # A container item (group) without its mandatory 'steps:' list.
    - group:
        name: g
--- a/test/validation/load_errors/scalar_body.tum
+++ b/test/validation/load_errors/scalar_body.tum
@@ -0,0 +1,5 @@
 main:
  name: root
  steps:
    # The body of an item must be a mapping of parameters, not a scalar.
    - sleep: 5
--- a/test/validation/load_errors/step_not_mapping.tum
+++ b/test/validation/load_errors/step_not_mapping.tum
@@ -0,0 +1,5 @@
 main:
  name: root
  steps:
    # A step that is a bare scalar instead of a '<item>: ...' mapping.
    - just some text
--- a/test/validation/load_errors/two_steps.tum
+++ b/test/validation/load_errors/two_steps.tum
@@ -0,0 +1,11 @@
 main:
  name: root
  steps:
    # Two items wrongly packed under a single '-' marker (a frequent indent
    # mistake): the second key belongs one '-' lower.
    - sleep:
        name: s
        timeout: 0
      group:
        name: g
        steps: []
--- a/test/validation/load_errors/unknown_action.tum
+++ b/test/validation/load_errors/unknown_action.tum
@@ -0,0 +1,8 @@
 main:
  name: root
  steps:
    - console:
        console_name: c1
        steps:
          - opens:
              device: /dev/ttyUSB0
--- a/test/validation/load_errors/unknown_item.tum
+++ b/test/validation/load_errors/unknown_item.tum
@@ -0,0 +1,5 @@
 main:
  name: root
  steps:
    - frobnicate:
        name: nope
--- a/test/validation/load_errors_check.py
+++ b/test/validation/load_errors_check.py
@@ -0,0 +1,87 @@
 #!/usr/bin/env python3
 """Per-channel check of test-load error reporting.
 Given the channel's testium invocation as argv (e.g. ``flatpak run
 --command=testium org.testium.Testium``, a PyInstaller binary path, or
 ``python -m testium``), load each deliberately broken ``.tum`` under
 ``load_errors/`` in batch mode and verify that:
  1. the load FAILS (non-zero exit), and
  2. the output carries the *specific, located* message we expect — not a bare
     Python traceback and not the generic 'crashed for any reason'.
 This guards the load-time error handling in ``test_set.load_test_recursively``
 and ``item_actions.load`` (a structural mistake in a ``.tum`` must always reach
 the user as a readable ``TUM file syntax error`` naming the offending file,
 item path and value). The historical failure mode was an unknown console
 action crashing the error formatter itself with ``'dict_keys' object is not
 subscriptable``.
 Exits non-zero (with a diagnostic) on the first failure so the validation run
 fails loudly. Used by ``run.sh`` before launching the main suite.
 """
 import os
 import re
 import subprocess
 import sys
 HERE = os.path.dirname(os.path.abspath(__file__))
 FIXTURES = os.path.join(HERE, "load_errors")
 # testium colourises its log; strip the ANSI escapes before matching messages.
 _ANSI = re.compile(r"\x1b\[[0-9;]*m")
 # fixture file -> substrings that must all appear in the load output.
 CASES = [
    ("unknown_item.tum",     ["TUM file syntax error", "is not a known test item",
                              "frobnicate", "Known items:"]),
    ("unknown_action.tum",   ["unknown action", "opens", "Known actions:"]),
    ("two_steps.tum",        ["must define exactly one test item"]),
    ("scalar_body.tum",      ["body of test item 'sleep'", "must be a mapping"]),
    ("group_no_steps.tum",   ["No 'steps' list found", "'group' item 'g'"]),
    ("step_not_mapping.tum",  ["is not a valid test item"]),
    # The error is inside the included file: the message must name that file.
    ("bad_include.tum",      ["bad_include_inc.tum", "frobnicate_in_include",
                              "is not a known test item"]),
 ]
 def fail(msg):
    print(f"LOAD-ERROR CHECK: FAIL — {msg}", file=sys.stderr)
    sys.exit(1)
 def check_case(cmd, fixture, needles):
    path = os.path.join(FIXTURES, fixture)
    try:
        out = subprocess.run(cmd + ["-b", path], capture_output=True, timeout=120)
    except Exception as e:  # noqa: BLE001
        fail(f"`{' '.join(cmd)} -b {fixture}` could not run: {e}")
    blob = _ANSI.sub("", (out.stdout + out.stderr).decode(errors="replace"))
    if out.returncode == 0 or "Test run success." in blob:
        fail(f"{fixture}: load was expected to fail but succeeded "
             f"(exit {out.returncode}).")
    # A raw Python traceback reaching the user is exactly what we are guarding
    # against: every load error must be funnelled through a TUM*Error.
    if "Traceback (most recent call last)" in blob:
        fail(f"{fixture}: a raw Python traceback leaked to the user:\n"
             f"{blob[-600:]}")
    missing = [n for n in needles if n not in blob]
    if missing:
        fail(f"{fixture}: load message is missing {missing}.\n"
             f"--- got ---\n{blob[-800:]}")
    print(f"LOAD-ERROR CHECK: {fixture} OK")
 def main():
    cmd = sys.argv[1:]
    if not cmd:
        fail("usage: load_errors_check.py <testium-invocation...>")
    for fixture, needles in CASES:
        check_case(cmd, fixture, needles)
    print("LOAD-ERROR CHECK: PASS")
 if __name__ == "__main__":
    main()
--- a/test/validation/run.sh
+++ b/test/validation/run.sh
@@ -158,6 +158,12 @@ echo "-- launch: ${CMD[*]}"
 echo "-- LSP check ($MODE)"
 "$VENV_PYTHON" "$SCRIPT_DIR/lsp_check.py" "${CMD[@]}"
 # ---------- load-error check (this exact channel) -----------------------------
 # Deliberately broken .tum files must fail to load with a specific, located
 # message (not a raw traceback): guards the load-time error handling.
 echo "-- load-error check ($MODE)"
 "$VENV_PYTHON" "$SCRIPT_DIR/load_errors_check.py" "${CMD[@]}"
 if [ "$GUI" -eq 1 ]; then
    echo "-- GUI mode: the suite is loaded; press Start to run. Window stays open."
 fi