Generator + in-process harness timing the real loader's three stages and template/YAML call counts, across tunable profiles. cases/ git-ignored; see test/benchmark/README.md. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
201 lines
7.4 KiB
Python
Executable File
201 lines
7.4 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Time the testium *load* pipeline on a given ``.tum`` tree.
|
|
|
|
It drives the real loader code (``TestProcess._load_initial_params`` /
|
|
``_load_test`` then ``TestSet(...)``) in-process, so the numbers track the
|
|
production path and stay honest as the code evolves. Execution is never
|
|
triggered — we stop exactly where ``Batch`` would report the test as *loaded*.
|
|
|
|
Reported per run, over ``--repeat`` iterations (min is the headline, least
|
|
noisy):
|
|
|
|
initial first pass: discover config files (template+YAML, no includes)
|
|
loadtest config-file fixpoint loop + full recursive include/template/YAML
|
|
build TestSet construction: the load_test_recursively tree build
|
|
total sum of the three
|
|
|
|
Plus instrumentation counters (exact call counts, wall time) for the two
|
|
hot leaves the optimisation axes target:
|
|
|
|
templates jinja template_to_test() calls (axis 1 compile cache, axis 2 tempfile)
|
|
yaml yaml_load() parses (axis 3 C loader)
|
|
|
|
template time is exclusive (one file render); yaml time is wall-inclusive of
|
|
nested includes, so lean on the *counts* for attribution.
|
|
|
|
Must run inside the project venv (jinja2, pyyaml, telnetlib3, ...). The
|
|
benchmark profiles contain no ``<| |>`` so the external eval process is not
|
|
needed; pass --with-eval to start it for faithfulness on eval-heavy trees.
|
|
|
|
Usage (see run.sh for the convenience wrapper):
|
|
test/tmp/.venv/bin/python3 test/benchmark/load_bench.py [--repeat 5] <main.tum>
|
|
"""
|
|
import argparse
|
|
import os
|
|
import statistics
|
|
import sys
|
|
from queue import Queue
|
|
from time import perf_counter
|
|
|
|
# --- bootstrap: src/testium for flat imports, src for `import testium` --------
|
|
HERE = os.path.dirname(os.path.abspath(__file__))
|
|
ROOT = os.path.abspath(os.path.join(HERE, "..", ".."))
|
|
sys.path.insert(0, os.path.join(ROOT, "src"))
|
|
sys.path.insert(0, os.path.join(ROOT, "src", "testium"))
|
|
|
|
import api.testium as tm # noqa: E402
|
|
from interpreter.utils.test_init import env_init, apply_overrides # noqa: E402
|
|
from interpreter.utils.test_ctrl import TestSetController # noqa: E402
|
|
from interpreter.process import TestProcess # noqa: E402
|
|
from interpreter.test_set import TestSet # noqa: E402
|
|
from interpreter.utils.py_eval import eval_process_init # noqa: E402
|
|
from interpreter.utils.api_srv import api_request # noqa: E402
|
|
|
|
# --- instrumentation: count + time the two hot leaves -------------------------
|
|
import interpreter.process as _proc # noqa: E402
|
|
import interpreter.utils.include as _inc # noqa: E402
|
|
import interpreter.utils.test_init as _ti # noqa: E402
|
|
import interpreter.utils.template as _tpl # noqa: E402
|
|
import interpreter.utils.yaml_load as _yl # noqa: E402
|
|
|
|
_C = {"tpl_n": 0, "tpl_t": 0.0, "yaml_n": 0, "yaml_t": 0.0}
|
|
_orig_tpl = _tpl.template_to_test
|
|
_orig_yaml = _yl.yaml_load
|
|
|
|
|
|
def _wrap_tpl(*a, **k):
|
|
t = perf_counter()
|
|
try:
|
|
return _orig_tpl(*a, **k)
|
|
finally:
|
|
_C["tpl_t"] += perf_counter() - t
|
|
_C["tpl_n"] += 1
|
|
|
|
|
|
def _wrap_yaml(*a, **k):
|
|
t = perf_counter()
|
|
try:
|
|
return _orig_yaml(*a, **k)
|
|
finally:
|
|
_C["yaml_t"] += perf_counter() - t
|
|
_C["yaml_n"] += 1
|
|
|
|
|
|
# rebind in every module that did `from ... import template_to_test / yaml_load`
|
|
for _m in (_proc, _inc):
|
|
_m.template_to_test = _wrap_tpl
|
|
for _m in (_proc, _inc, _ti):
|
|
_m.yaml_load = _wrap_yaml
|
|
|
|
|
|
def _reset_counters():
|
|
_C.update(tpl_n=0, tpl_t=0.0, yaml_n=0, yaml_t=0.0)
|
|
|
|
|
|
def load_once(tp, fname, test_dir):
|
|
"""One full load (no execution). Returns (initial, loadtest, build) seconds."""
|
|
t0 = perf_counter()
|
|
init_pf, gv = tp._load_initial_params(test_dir)
|
|
t1 = perf_counter()
|
|
test_dict, _pf = tp._load_test(init_pf, gv)
|
|
t2 = perf_counter()
|
|
TestSet(fname, test_dict, Queue())
|
|
t3 = perf_counter()
|
|
return (t1 - t0, t2 - t1, t3 - t2)
|
|
|
|
|
|
def main():
|
|
ap = argparse.ArgumentParser(description=__doc__,
|
|
formatter_class=argparse.RawDescriptionHelpFormatter)
|
|
ap.add_argument("main_tum", help="path to the generated main.tum")
|
|
ap.add_argument("--repeat", type=int, default=5)
|
|
ap.add_argument("--with-eval", action="store_true",
|
|
help="start the external eval process (needed only for <| |> at load)")
|
|
ap.add_argument("--quiet", action="store_true",
|
|
help="silence the loader's INFO output during runs")
|
|
args = ap.parse_args()
|
|
|
|
fname = os.path.abspath(args.main_tum)
|
|
if not os.path.isfile(fname):
|
|
ap.error(f"not found: {fname}")
|
|
test_dir = os.path.dirname(fname)
|
|
|
|
env_init()
|
|
apply_overrides({}, {})
|
|
|
|
eval_proc = None
|
|
if args.with_eval:
|
|
eval_proc = eval_process_init(api_request, 10, test_dir)
|
|
eval_proc.start()
|
|
eval_proc.wait_ready(10)
|
|
|
|
if args.quiet:
|
|
# the loader prints a couple of INFO lines per config file; mute stdout
|
|
# around the measured section to avoid I/O skew.
|
|
devnull = open(os.devnull, "w")
|
|
real_stdout = sys.stdout
|
|
|
|
tp = TestProcess(fname, Queue(), TestSetController(),
|
|
config_files=[], defines={}, gui_defaults={}, text_mode=True)
|
|
|
|
samples = [] # list of (initial, loadtest, build)
|
|
last_counters = None
|
|
try:
|
|
for r in range(args.repeat):
|
|
_reset_counters()
|
|
if args.quiet:
|
|
sys.stdout = devnull
|
|
try:
|
|
samples.append(load_once(tp, fname, test_dir))
|
|
except RecursionError:
|
|
if args.quiet:
|
|
sys.stdout = real_stdout
|
|
print(f"file : {fname}")
|
|
print("ERROR : RecursionError during load — the include "
|
|
"nesting is too deep for the recursive loader.\n"
|
|
" (each include level costs ~10 stack frames; "
|
|
"raise sys.setrecursionlimit to probe further.)")
|
|
return 2
|
|
except Exception as e: # noqa: BLE001 - report, don't crash the bench
|
|
if args.quiet:
|
|
sys.stdout = real_stdout
|
|
print(f"file : {fname}")
|
|
print(f"ERROR : load failed: {type(e).__name__}: {e}")
|
|
return 2
|
|
finally:
|
|
if args.quiet:
|
|
sys.stdout = real_stdout
|
|
last_counters = dict(_C)
|
|
finally:
|
|
if eval_proc is not None:
|
|
eval_proc.stop()
|
|
eval_proc.join()
|
|
if args.quiet:
|
|
devnull.close()
|
|
|
|
initial = [s[0] for s in samples]
|
|
loadtest = [s[1] for s in samples]
|
|
build = [s[2] for s in samples]
|
|
total = [sum(s) for s in samples]
|
|
|
|
def stat(xs):
|
|
return min(xs), statistics.median(xs)
|
|
|
|
print(f"file : {fname}")
|
|
print(f"repeats : {args.repeat} (showing min | median, seconds)")
|
|
print(f"{'phase':<10}{'min':>12}{'median':>12}")
|
|
for name, xs in (("initial", initial), ("loadtest", loadtest),
|
|
("build", build), ("total", total)):
|
|
mn, md = stat(xs)
|
|
print(f"{name:<10}{mn:>12.4f}{md:>12.4f}")
|
|
if last_counters:
|
|
print("counters (last run):")
|
|
print(f" templates : {last_counters['tpl_n']:>7d} calls "
|
|
f"{last_counters['tpl_t']:>8.4f}s (exclusive: jinja compile+render+tempfile)")
|
|
print(f" yaml : {last_counters['yaml_n']:>7d} parses "
|
|
f"{last_counters['yaml_t']:>8.4f}s (inclusive of nested includes)")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main() or 0)
|