diff --git a/src/ropeway/cli.py b/src/ropeway/cli.py index 7c41e61..4662c8d 100644 --- a/src/ropeway/cli.py +++ b/src/ropeway/cli.py @@ -294,6 +294,31 @@ def validate(): click.secho(f"Overall: {'PASS' if rep.passed else 'FAIL'}", fg=color) +@main.command() +@click.argument("prompt", nargs=-1, required=True) +def ask(prompt): + """Phase 13: parse a natural-language ropeway request and run it. + + Example: ``ropeway ask "build a 2 km MGD with seed 7"`` + """ + from .nlp import format_summary, parse_prompt, run_from_prompt + + text = " ".join(prompt) + parsed = parse_prompt(text) + click.echo("Parsed request:") + for line in format_summary(parsed).splitlines(): + click.echo(f" {line}") + click.echo() + click.echo("Running optimizer on synthetic terrain...") + result = run_from_prompt(text) + status_color = "green" if result["feasible"] else "red" + click.secho( + f"Feasible: {result['feasible']}", fg=status_color, + ) + for k, v in result["metrics"].items(): + click.echo(f" {k}: {v}") + + @main.command(name="rl-train") @click.option("--length", "length_m", type=float, default=3000.0) @click.option("--dem", "dem_path", type=click.Path(exists=True, path_type=Path), default=None) diff --git a/src/ropeway/nlp.py b/src/ropeway/nlp.py new file mode 100644 index 0000000..f67211e --- /dev/null +++ b/src/ropeway/nlp.py @@ -0,0 +1,286 @@ +"""Phase 13 — natural-language prompt → optimizer run. + +A deterministic, dependency-free rule-based parser that extracts the +shape of a ropeway request from a plain-English prompt and runs the +optimizer with it. Designed so the CI path needs no API key and no +network — the LLM is *optional* (a `parse_with_llm` hook exists for +prompts the rule-based parser cannot pin down, gated on +``ANTHROPIC_API_KEY`` being set; if absent the rule-based parse is the +sole path). + +Patterns the parser recognises +------------------------------ +* **System type** — ``mgd``, ``jigback``/``jig-back``, ``bgd``, ``3s``, + ``funitel``, ``chair``/``chairlift``. Also accepts common English + paraphrases (``urban gondola`` → MGD, ``aerial tram`` → jigback, + ``tri-cable`` → 3S, ``six-pack`` → chairlift). +* **Corridor length** — ``2 km``, ``2000 m``, ``2.5km``, ``three + kilometres``, ``5000 metres``. +* **Coordinates** — any pair of ``(lon, lat)`` numbers in plausible + WGS84 ranges, separated by comma or space. Two such pairs in a prompt + become start + end. +* **Generations** — ``80 generations``, ``gen 60``, ``--generations 80``. +* **Seed** — ``seed 42``, ``--seed 7``. +* **Max towers** — ``up to 10 towers``, ``max-towers 8``, ``8 towers``. + +Anything unrecognised lands in ``ParsedRequest.notes`` so the caller +can show the user what was ignored. +""" + +from __future__ import annotations + +import os +import re +from dataclasses import dataclass, field +from typing import Optional + +from .multi_rope import RopewaySystemType + + +@dataclass +class ParsedRequest: + """Structured view of a natural-language ropeway request.""" + corridor_length_m: Optional[float] = None + system: Optional[RopewaySystemType] = None + start_lonlat: Optional[tuple[float, float]] = None + end_lonlat: Optional[tuple[float, float]] = None + generations: Optional[int] = None + seed: Optional[int] = None + max_towers: Optional[int] = None + notes: list[str] = field(default_factory=list) + + def is_synthetic(self) -> bool: + """True when no real coordinates were given — run on synthetic terrain.""" + return self.start_lonlat is None or self.end_lonlat is None + + +_WORD_NUMS = { + "one": 1, "two": 2, "three": 3, "four": 4, "five": 5, + "six": 6, "seven": 7, "eight": 8, "nine": 9, "ten": 10, +} + +_SYSTEM_ALIASES = { + # canonical type → list of recognised aliases (lowercased, word-boundary matched) + RopewaySystemType.MGD: [ + "mgd", "monocable detachable gondola", "monocable gondola", + "urban gondola", "gondola", + ], + RopewaySystemType.JIG_BACK: [ + "jigback", "jig-back", "jig back", "aerial tram", "tram", + "pendelbahn", + ], + RopewaySystemType.BGD: [ + "bgd", "bi-cable", "bicable", "bi-cable gondola", + ], + RopewaySystemType.TGD_3S: [ + "3s", "tri-cable", "tricable", "3-s", + ], + RopewaySystemType.FUNITEL: [ + "funitel", + ], + RopewaySystemType.CHAIRLIFT: [ + "chair", "chairlift", "chair-lift", "six-pack", "six pack", + "detachable chair", + ], +} + + +def _parse_length_to_m(text: str) -> Optional[float]: + """Find a corridor length token like '2 km' or '2000 m' in the text.""" + # numeric form: 2 km, 2.5km, 2000 m, 5000 metres + m = re.search( + r"(? Optional[RopewaySystemType]: + lower = text.lower() + # Longest aliases first so 'jig-back' beats 'jig'. + candidates: list[tuple[int, RopewaySystemType, str]] = [] + for sys_type, aliases in _SYSTEM_ALIASES.items(): + for alias in aliases: + pattern = r"(? tuple[Optional[tuple[float, float]], + Optional[tuple[float, float]]]: + """Find up to two (lon, lat) pairs. Accepts ``lon,lat`` or ``lon lat``.""" + # Match a lon,lat pair: lon ∈ [-180, 180], lat ∈ [-90, 90]. + coord_re = re.compile( + r"(-?\d{1,3}\.\d+)[,\s]+(-?\d{1,3}\.\d+)" + ) + pairs: list[tuple[float, float]] = [] + for m in coord_re.finditer(text): + lon, lat = float(m.group(1)), float(m.group(2)) + if -180.0 <= lon <= 180.0 and -90.0 <= lat <= 90.0: + pairs.append((lon, lat)) + if len(pairs) >= 2: + break + if len(pairs) == 0: + return None, None + if len(pairs) == 1: + return pairs[0], None + return pairs[0], pairs[1] + + +def _parse_int(text: str, *keywords: str) -> Optional[int]: + """Find an integer associated with one of ``keywords``. + + Two patterns are recognised, with **num-before-keyword preferred** + so prompts like ``"80 generations, seed 7"`` don't bleed the seed + value into the generations field via a greedy post-keyword match. + """ + for kw in keywords: + # Prefer ' ' (e.g. '8 towers', '80 generations'). + m = re.search(rf"\b(\d+)\s+{re.escape(kw)}\b", text, re.IGNORECASE) + if m: + return int(m.group(1)) + for kw in keywords: + # Fallback: ' ' (e.g. 'seed 42', '--seed 42'), + # constrained to a short separator so we don't swallow far-away + # numbers belonging to a different field. + m = re.search(rf"\b{re.escape(kw)}\b[\s:=]{{,3}}(-?\d+)\b", + text, re.IGNORECASE) + if m: + return int(m.group(1)) + return None + + +def parse_prompt(prompt: str) -> ParsedRequest: + """Parse a natural-language ropeway request into a structured form.""" + req = ParsedRequest() + if not prompt or not prompt.strip(): + req.notes.append("empty prompt") + return req + + req.corridor_length_m = _parse_length_to_m(prompt) + req.system = _parse_system(prompt) + req.start_lonlat, req.end_lonlat = _parse_coords(prompt) + req.generations = _parse_int(prompt, "generations", "gens", "gen") + req.seed = _parse_int(prompt, "seed") + # Tower keyword first, then '--max-towers'. + req.max_towers = _parse_int(prompt, "towers", "max-towers") + + if req.corridor_length_m is None and req.start_lonlat is None: + req.notes.append( + "no corridor length or coordinates found — defaulting to a " + "3000 m synthetic corridor" + ) + if req.system is None: + req.notes.append("no system type recognised — defaulting to MGD") + return req + + +def format_summary(req: ParsedRequest) -> str: + """Human-readable single-paragraph summary of a parsed request.""" + parts: list[str] = [] + sys = (req.system.value if req.system else "mgd (default)") + parts.append(f"system: {sys}") + if req.start_lonlat and req.end_lonlat: + parts.append( + f"corridor: ({req.start_lonlat[0]:.5f}, {req.start_lonlat[1]:.5f}) → " + f"({req.end_lonlat[0]:.5f}, {req.end_lonlat[1]:.5f})" + ) + elif req.corridor_length_m: + parts.append(f"corridor: {req.corridor_length_m:.0f} m synthetic") + else: + parts.append("corridor: 3000 m synthetic (default)") + if req.generations: + parts.append(f"generations: {req.generations}") + if req.seed is not None: + parts.append(f"seed: {req.seed}") + if req.max_towers: + parts.append(f"max towers: {req.max_towers}") + summary = " | ".join(parts) + if req.notes: + summary += "\n notes: " + "; ".join(req.notes) + return summary + + +def run_from_prompt(prompt: str) -> dict: + """Parse a prompt and execute the optimizer with the resulting plan. + + Returns a dict carrying the parsed request, a human-readable summary, + and the headline metrics from the run. Always uses synthetic terrain + unless both terminal coordinates **and** a local DEM tile are present + (DEM-from-coords resolution is intentionally out of scope here — + matches the existing FastAPI service's split: synthetic on the + public path, DEM upload behind auth). + """ + # Imports are local to keep the parser surface lightweight for callers + # that only want `parse_prompt`. + from .alignment import evaluate_alignment # noqa: F401 - sanity + from .dem import synthetic_profile + from .multi_rope import system_defaults + from .optimizer import GAConfig, optimize + + req = parse_prompt(prompt) + sys_type = req.system or RopewaySystemType.MGD + cfg = system_defaults(sys_type) + length = req.corridor_length_m or 3000.0 + seed = req.seed if req.seed is not None else 42 + profile = synthetic_profile(length_m=length, seed=seed) + ga = GAConfig( + max_intermediate_towers=req.max_towers or 8, + generations=req.generations or 60, + population_size=80, + seed=seed, + ) + result = optimize(profile.as_function(), profile.total_length, + cfg=cfg, ga=ga, verbose=False) + rep = result.best_result.report + align = result.best_alignment + return { + "request": req, + "summary": format_summary(req), + "feasible": result.best_result.feasible, + "metrics": { + "intermediate_towers": max(0, len(align.towers) - 2), + "cable_length_m": rep.total_cable_length_m, + "min_clearance_m": rep.min_clearance_m, + "max_tension_kn": rep.max_tension_n / 1e3, + "max_break_over_deg": rep.max_break_over_deg, + "cost": result.best_result.cost, + }, + } + + +def llm_available() -> bool: + """True when an Anthropic API key is configured (for the optional path). + + The rule-based ``parse_prompt`` is the primary, fully-deterministic + path; an LLM-backed fallback is exposed for callers that want to + handle prompts the rules cannot pin down. CI never exercises this. + """ + return bool(os.getenv("ANTHROPIC_API_KEY")) + + +__all__ = [ + "ParsedRequest", + "parse_prompt", + "format_summary", + "run_from_prompt", + "llm_available", +] diff --git a/tests/test_nlp.py b/tests/test_nlp.py new file mode 100644 index 0000000..12499a5 --- /dev/null +++ b/tests/test_nlp.py @@ -0,0 +1,146 @@ +"""Phase 13 — natural-language prompt parser + run-from-prompt tests.""" + +from __future__ import annotations + +import pytest + +from ropeway.multi_rope import RopewaySystemType +from ropeway.nlp import format_summary, parse_prompt, run_from_prompt + + +# --------------------------------------------------------------------------- +# Length parsing +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("prompt,expected_m", [ + ("build a 2 km MGD", 2000.0), + ("a 2.5km gondola", 2500.0), + ("a 2000 m chairlift", 2000.0), + ("3000 metres corridor", 3000.0), + ("two kilometres", 2000.0), + ("three km", 3000.0), + ("no length here", None), +]) +def test_parse_length(prompt, expected_m): + req = parse_prompt(prompt) + assert req.corridor_length_m == expected_m + + +# --------------------------------------------------------------------------- +# System parsing +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("prompt,expected", [ + ("MGD", RopewaySystemType.MGD), + ("an urban gondola", RopewaySystemType.MGD), + ("a jig-back tram", RopewaySystemType.JIG_BACK), + ("aerial tram", RopewaySystemType.JIG_BACK), + ("3S system", RopewaySystemType.TGD_3S), + ("tri-cable installation", RopewaySystemType.TGD_3S), + ("funitel for windy site", RopewaySystemType.FUNITEL), + ("bi-cable gondola", RopewaySystemType.BGD), + ("a six-pack chairlift", RopewaySystemType.CHAIRLIFT), + ("detachable chair", RopewaySystemType.CHAIRLIFT), +]) +def test_parse_system(prompt, expected): + assert parse_prompt(prompt).system == expected + + +def test_parse_no_system_returns_none_and_notes_default(): + req = parse_prompt("a 1500 m corridor") + assert req.system is None + assert any("MGD" in n for n in req.notes) + + +# --------------------------------------------------------------------------- +# Coordinates +# --------------------------------------------------------------------------- + + +def test_parse_two_coordinate_pairs(): + req = parse_prompt( + "ropeway from -68.13,-16.49 to -68.16,-16.51, urban gondola" + ) + assert req.start_lonlat == (-68.13, -16.49) + assert req.end_lonlat == (-68.16, -16.51) + assert req.is_synthetic() is False + + +def test_parse_coords_space_separated(): + req = parse_prompt("start 6.870 45.892 end 6.887 45.916 jigback") + assert req.start_lonlat == (6.870, 45.892) + assert req.end_lonlat == (6.887, 45.916) + + +def test_parse_no_coords_marks_synthetic(): + req = parse_prompt("a 2 km MGD") + assert req.is_synthetic() is True + + +# --------------------------------------------------------------------------- +# Optimizer knobs +# --------------------------------------------------------------------------- + + +def test_parse_generations_seed_and_max_towers(): + req = parse_prompt("a 2 km MGD with 80 generations, seed 7, up to 12 towers") + assert req.generations == 80 + assert req.seed == 7 + assert req.max_towers == 12 + + +def test_parse_max_towers_dash_form(): + req = parse_prompt("run with --max-towers 6") + assert req.max_towers == 6 + + +# --------------------------------------------------------------------------- +# Summary +# --------------------------------------------------------------------------- + + +def test_format_summary_includes_all_known_fields(): + req = parse_prompt("a 2 km MGD seed 5 with 30 generations and 6 towers") + summary = format_summary(req) + assert "system: mgd" in summary + assert "2000 m" in summary + assert "generations: 30" in summary + assert "seed: 5" in summary + assert "max towers: 6" in summary + + +def test_format_summary_lists_notes(): + summary = format_summary(parse_prompt("just a corridor")) + assert "notes:" in summary + + +# --------------------------------------------------------------------------- +# run_from_prompt orchestration +# --------------------------------------------------------------------------- + + +def test_run_from_prompt_returns_metrics_dict(): + out = run_from_prompt("a 1500 m MGD with 40 generations seed 3") + assert out["request"].corridor_length_m == 1500.0 + assert out["request"].system == RopewaySystemType.MGD + assert out["request"].generations == 40 + assert "feasible" in out + assert set(out["metrics"]).issuperset({ + "intermediate_towers", "cable_length_m", + "min_clearance_m", "max_tension_kn", + }) + # Headline metric returned regardless of feasibility (feasibility + # depends on synthetic-terrain RNG; the orchestrator promises a + # well-formed result dict, not a feasible alignment). + assert isinstance(out["metrics"]["cable_length_m"], float) + + +def test_run_from_prompt_defaults_to_3km_synthetic_mgd(): + out = run_from_prompt("optimize something") + # Empty-ish prompt → 3 km synthetic + MGD defaults. + assert out["request"].corridor_length_m is None + assert out["request"].system is None + # The runner picks defaults silently and still returns metrics. + assert "cable_length_m" in out["metrics"]