Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions src/ropeway/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,31 @@ def validate():
click.secho(f"Overall: {'PASS' if rep.passed else 'FAIL'}", fg=color)


@main.command()
@click.argument("prompt", nargs=-1, required=True)
def ask(prompt):
"""Phase 13: parse a natural-language ropeway request and run it.

Example: ``ropeway ask "build a 2 km MGD with seed 7"``
"""
from .nlp import format_summary, parse_prompt, run_from_prompt

text = " ".join(prompt)
parsed = parse_prompt(text)
click.echo("Parsed request:")
for line in format_summary(parsed).splitlines():
click.echo(f" {line}")
click.echo()
click.echo("Running optimizer on synthetic terrain...")
result = run_from_prompt(text)
status_color = "green" if result["feasible"] else "red"
click.secho(
f"Feasible: {result['feasible']}", fg=status_color,
)
for k, v in result["metrics"].items():
click.echo(f" {k}: {v}")


@main.command(name="rl-train")
@click.option("--length", "length_m", type=float, default=3000.0)
@click.option("--dem", "dem_path", type=click.Path(exists=True, path_type=Path), default=None)
Expand Down
286 changes: 286 additions & 0 deletions src/ropeway/nlp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,286 @@
"""Phase 13 — natural-language prompt → optimizer run.

A deterministic, dependency-free rule-based parser that extracts the
shape of a ropeway request from a plain-English prompt and runs the
optimizer with it. Designed so the CI path needs no API key and no
network — the LLM is *optional* (a `parse_with_llm` hook exists for
prompts the rule-based parser cannot pin down, gated on
``ANTHROPIC_API_KEY`` being set; if absent the rule-based parse is the
sole path).

Patterns the parser recognises
------------------------------
* **System type** — ``mgd``, ``jigback``/``jig-back``, ``bgd``, ``3s``,
``funitel``, ``chair``/``chairlift``. Also accepts common English
paraphrases (``urban gondola`` → MGD, ``aerial tram`` → jigback,
``tri-cable`` → 3S, ``six-pack`` → chairlift).
* **Corridor length** — ``2 km``, ``2000 m``, ``2.5km``, ``three
kilometres``, ``5000 metres``.
* **Coordinates** — any pair of ``(lon, lat)`` numbers in plausible
WGS84 ranges, separated by comma or space. Two such pairs in a prompt
become start + end.
* **Generations** — ``80 generations``, ``gen 60``, ``--generations 80``.
* **Seed** — ``seed 42``, ``--seed 7``.
* **Max towers** — ``up to 10 towers``, ``max-towers 8``, ``8 towers``.

Anything unrecognised lands in ``ParsedRequest.notes`` so the caller
can show the user what was ignored.
"""

from __future__ import annotations

import os
import re
from dataclasses import dataclass, field
from typing import Optional

from .multi_rope import RopewaySystemType


@dataclass
class ParsedRequest:
"""Structured view of a natural-language ropeway request."""
corridor_length_m: Optional[float] = None
system: Optional[RopewaySystemType] = None
start_lonlat: Optional[tuple[float, float]] = None
end_lonlat: Optional[tuple[float, float]] = None
generations: Optional[int] = None
seed: Optional[int] = None
max_towers: Optional[int] = None
notes: list[str] = field(default_factory=list)

def is_synthetic(self) -> bool:
"""True when no real coordinates were given — run on synthetic terrain."""
return self.start_lonlat is None or self.end_lonlat is None


_WORD_NUMS = {
"one": 1, "two": 2, "three": 3, "four": 4, "five": 5,
"six": 6, "seven": 7, "eight": 8, "nine": 9, "ten": 10,
}

_SYSTEM_ALIASES = {
# canonical type → list of recognised aliases (lowercased, word-boundary matched)
RopewaySystemType.MGD: [
"mgd", "monocable detachable gondola", "monocable gondola",
"urban gondola", "gondola",
],
RopewaySystemType.JIG_BACK: [
"jigback", "jig-back", "jig back", "aerial tram", "tram",
"pendelbahn",
],
RopewaySystemType.BGD: [
"bgd", "bi-cable", "bicable", "bi-cable gondola",
],
RopewaySystemType.TGD_3S: [
"3s", "tri-cable", "tricable", "3-s",
],
RopewaySystemType.FUNITEL: [
"funitel",
],
RopewaySystemType.CHAIRLIFT: [
"chair", "chairlift", "chair-lift", "six-pack", "six pack",
"detachable chair",
],
}


def _parse_length_to_m(text: str) -> Optional[float]:
"""Find a corridor length token like '2 km' or '2000 m' in the text."""
# numeric form: 2 km, 2.5km, 2000 m, 5000 metres
m = re.search(
r"(?<![\w-])(\d+(?:\.\d+)?)\s*(km|kilometres?|kilometers?|m|metres?|meters?)\b",
text, re.IGNORECASE,
)
if m:
value = float(m.group(1))
unit = m.group(2).lower()
return value * 1000.0 if unit.startswith(("k",)) else value
# word form: 'two kilometres', 'three km'
m = re.search(
r"\b(" + "|".join(_WORD_NUMS) + r")\s+(km|kilometres?|kilometers?|metres?|meters?)\b",
text, re.IGNORECASE,
)
if m:
value = float(_WORD_NUMS[m.group(1).lower()])
unit = m.group(2).lower()
return value * 1000.0 if unit.startswith("k") else value
return None


def _parse_system(text: str) -> Optional[RopewaySystemType]:
lower = text.lower()
# Longest aliases first so 'jig-back' beats 'jig'.
candidates: list[tuple[int, RopewaySystemType, str]] = []
for sys_type, aliases in _SYSTEM_ALIASES.items():
for alias in aliases:
pattern = r"(?<![\w-])" + re.escape(alias) + r"(?![\w-])"
if re.search(pattern, lower):
candidates.append((len(alias), sys_type, alias))
if not candidates:
return None
# Pick the longest matching alias (most specific).
candidates.sort(key=lambda t: -t[0])
return candidates[0][1]


def _parse_coords(text: str) -> tuple[Optional[tuple[float, float]],
Optional[tuple[float, float]]]:
"""Find up to two (lon, lat) pairs. Accepts ``lon,lat`` or ``lon lat``."""
# Match a lon,lat pair: lon ∈ [-180, 180], lat ∈ [-90, 90].
coord_re = re.compile(
r"(-?\d{1,3}\.\d+)[,\s]+(-?\d{1,3}\.\d+)"
)
pairs: list[tuple[float, float]] = []
for m in coord_re.finditer(text):
lon, lat = float(m.group(1)), float(m.group(2))
if -180.0 <= lon <= 180.0 and -90.0 <= lat <= 90.0:
pairs.append((lon, lat))
if len(pairs) >= 2:
break
if len(pairs) == 0:
return None, None
if len(pairs) == 1:
return pairs[0], None
return pairs[0], pairs[1]


def _parse_int(text: str, *keywords: str) -> Optional[int]:
"""Find an integer associated with one of ``keywords``.

Two patterns are recognised, with **num-before-keyword preferred**
so prompts like ``"80 generations, seed 7"`` don't bleed the seed
value into the generations field via a greedy post-keyword match.
"""
for kw in keywords:
# Prefer '<int> <keyword>' (e.g. '8 towers', '80 generations').
m = re.search(rf"\b(\d+)\s+{re.escape(kw)}\b", text, re.IGNORECASE)
if m:
return int(m.group(1))
for kw in keywords:
# Fallback: '<keyword> <int>' (e.g. 'seed 42', '--seed 42'),
# constrained to a short separator so we don't swallow far-away
# numbers belonging to a different field.
m = re.search(rf"\b{re.escape(kw)}\b[\s:=]{{,3}}(-?\d+)\b",
text, re.IGNORECASE)
if m:
return int(m.group(1))
return None


def parse_prompt(prompt: str) -> ParsedRequest:
"""Parse a natural-language ropeway request into a structured form."""
req = ParsedRequest()
if not prompt or not prompt.strip():
req.notes.append("empty prompt")
return req

req.corridor_length_m = _parse_length_to_m(prompt)
req.system = _parse_system(prompt)
req.start_lonlat, req.end_lonlat = _parse_coords(prompt)
req.generations = _parse_int(prompt, "generations", "gens", "gen")
req.seed = _parse_int(prompt, "seed")
# Tower keyword first, then '--max-towers'.
req.max_towers = _parse_int(prompt, "towers", "max-towers")

if req.corridor_length_m is None and req.start_lonlat is None:
req.notes.append(
"no corridor length or coordinates found — defaulting to a "
"3000 m synthetic corridor"
)
if req.system is None:
req.notes.append("no system type recognised — defaulting to MGD")
return req


def format_summary(req: ParsedRequest) -> str:
"""Human-readable single-paragraph summary of a parsed request."""
parts: list[str] = []
sys = (req.system.value if req.system else "mgd (default)")
parts.append(f"system: {sys}")
if req.start_lonlat and req.end_lonlat:
parts.append(
f"corridor: ({req.start_lonlat[0]:.5f}, {req.start_lonlat[1]:.5f}) → "
f"({req.end_lonlat[0]:.5f}, {req.end_lonlat[1]:.5f})"
)
elif req.corridor_length_m:
parts.append(f"corridor: {req.corridor_length_m:.0f} m synthetic")
else:
parts.append("corridor: 3000 m synthetic (default)")
if req.generations:
parts.append(f"generations: {req.generations}")
if req.seed is not None:
parts.append(f"seed: {req.seed}")
if req.max_towers:
parts.append(f"max towers: {req.max_towers}")
summary = " | ".join(parts)
if req.notes:
summary += "\n notes: " + "; ".join(req.notes)
return summary


def run_from_prompt(prompt: str) -> dict:
"""Parse a prompt and execute the optimizer with the resulting plan.

Returns a dict carrying the parsed request, a human-readable summary,
and the headline metrics from the run. Always uses synthetic terrain
unless both terminal coordinates **and** a local DEM tile are present
(DEM-from-coords resolution is intentionally out of scope here —
matches the existing FastAPI service's split: synthetic on the
public path, DEM upload behind auth).
"""
# Imports are local to keep the parser surface lightweight for callers
# that only want `parse_prompt`.
from .alignment import evaluate_alignment # noqa: F401 - sanity
from .dem import synthetic_profile
from .multi_rope import system_defaults
from .optimizer import GAConfig, optimize

req = parse_prompt(prompt)
sys_type = req.system or RopewaySystemType.MGD
cfg = system_defaults(sys_type)
length = req.corridor_length_m or 3000.0
seed = req.seed if req.seed is not None else 42
profile = synthetic_profile(length_m=length, seed=seed)
ga = GAConfig(
max_intermediate_towers=req.max_towers or 8,
generations=req.generations or 60,
population_size=80,
seed=seed,
)
result = optimize(profile.as_function(), profile.total_length,
cfg=cfg, ga=ga, verbose=False)
rep = result.best_result.report
align = result.best_alignment
return {
"request": req,
"summary": format_summary(req),
"feasible": result.best_result.feasible,
"metrics": {
"intermediate_towers": max(0, len(align.towers) - 2),
"cable_length_m": rep.total_cable_length_m,
"min_clearance_m": rep.min_clearance_m,
"max_tension_kn": rep.max_tension_n / 1e3,
"max_break_over_deg": rep.max_break_over_deg,
"cost": result.best_result.cost,
},
}


def llm_available() -> bool:
"""True when an Anthropic API key is configured (for the optional path).

The rule-based ``parse_prompt`` is the primary, fully-deterministic
path; an LLM-backed fallback is exposed for callers that want to
handle prompts the rules cannot pin down. CI never exercises this.
"""
return bool(os.getenv("ANTHROPIC_API_KEY"))


__all__ = [
"ParsedRequest",
"parse_prompt",
"format_summary",
"run_from_prompt",
"llm_available",
]
Loading
Loading