Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -973,7 +973,7 @@ <h2 id="nd-title">…</h2>
let graphRefreshInFlight = false;
const chatState = { conversationId: null, messages: [] };
let nodesSort = { key: null, dir: 'asc' }; // null => server "worst first" default
const SEV_RANK = { critical: 0, error: 0, warning: 1, warn: 1, info: 2, notice: 3 };
const SEV_RANK = { critical: 0, crit: 0, error: 0, warning: 1, warn: 1, info: 2, notice: 3 };
const ASSESSMENT_STATE_CLASS = {
probation: 'good',
relaxing: 'good',
Expand Down Expand Up @@ -1698,16 +1698,6 @@ <h2 id="nd-title">…</h2>
const pillsHost = $('#issues-sev-pills');
issuesBody.innerHTML = '';
pillsHost.innerHTML = '';
// Issue detection is paused pending redesign (#5). The API returns
// `status: "placeholder"` until new rules ship. Render the note
// verbatim so users (and any AI consumer scraping the dashboard)
// don't read an empty list as "all clear".
if (s.issues && s.issues.status === 'placeholder') {
const note = s.issues.note
|| 'Issue detection is paused pending redesign.';
issuesBody.appendChild(el('div', {class: 'muted', style: 'padding: 6px 0;'}, note));
return;
}
const issues = ((s.issues && s.issues.issues) || []).slice();
if (!issues.length) {
issuesBody.appendChild(el('div', {class:'empty'}, 'No active issues.'));
Expand All @@ -1719,10 +1709,10 @@ <h2 id="nd-title">…</h2>
const sev = (i.severity || 'info').toLowerCase();
counts[sev] = (counts[sev] || 0) + 1;
}
const order = ['critical', 'error', 'warning', 'warn', 'info', 'notice'];
const order = ['critical', 'crit', 'error', 'warning', 'warn', 'info', 'notice'];
for (const k of order) {
if (!counts[k]) continue;
const cls = (k === 'critical' || k === 'error') ? 'bad' : (k.startsWith('warn') ? 'warn' : 'muted');
const cls = (k === 'critical' || k === 'crit' || k === 'error') ? 'bad' : (k.startsWith('warn') ? 'warn' : 'muted');
pillsHost.appendChild(el('span', {
class: 'pill ' + cls,
style: 'margin-left:4px;',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -866,20 +866,6 @@ def health_snapshot() -> dict[str, object]:

@app.get("/v1/issues/active")
def list_active_issues() -> dict[str, object]:
# Issue detection is paused pending redesign — see tracking
# issue #5 and placeholder issue #4. We deliberately return an
# empty list with an explicit ``status: "placeholder"`` so
# consumers (dashboard, MCP, AI reasoners) don't misread the
# absence of issues as "all clear".
from ..pipeline.reasoner import ISSUES_PAUSED, ISSUES_PAUSED_NOTE
if ISSUES_PAUSED:
return {
"count": 0,
"issues": [],
"status": "placeholder",
"note": ISSUES_PAUSED_NOTE,
"computed_at": _utc_now(),
}
try:
issues = get_store().list_active_issues()
return {"count": len(issues), "issues": issues, "computed_at": _utc_now()}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,12 @@ class ToolCallRequest(BaseModel):
{
"name": "list_active_issues",
"description": (
"Return all currently-open Thread network issues. "
"NOTE: Issue detection is currently paused pending a redesign of the rule set "
"(see tracking issue #5). Until new rules ship, this tool returns an empty list "
"with `status: \"placeholder\"`. Do NOT infer \"all clear\" from the empty list — "
"instead, reason from the raw data (topology, partitions, links, nodes)."
"Return all currently-open Thread network issues computed by deterministic rules. "
"Each issue includes the affected EUI64 (or null for mesh-wide issues), "
"`first_seen_at`, `last_seen_at`, a severity that reflects actionability × freshness, "
"and an evidence payload that includes the EUIs involved and the observation that triggered it. "
"Current rule taxonomy: "
"`real_partition_split`, `dead_link_reference`, `route_to_otbr_unreachable`."
),
"inputSchema": {"type": "object", "properties": {}, "required": []},
}, {
Expand Down Expand Up @@ -1031,17 +1032,6 @@ async def _dispatch_tool(name: str, arguments: dict[str, Any]) -> dict[str, Any]
except Exception as exc: # noqa: BLE001
return {"error": str(exc)}
if name == "list_active_issues":
# Mirrors /v1/issues/active. Issue detection is paused
# pending redesign (#5); return an explicit placeholder so AI
# consumers don't infer "all clear" from an empty list.
from ..pipeline.reasoner import ISSUES_PAUSED, ISSUES_PAUSED_NOTE
if ISSUES_PAUSED:
return {
"count": 0,
"issues": [],
"status": "placeholder",
"note": ISSUES_PAUSED_NOTE,
}
try:
issues = get_store().list_active_issues()
return {"count": len(issues), "issues": issues}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ def _evidence_implicates_eui(evidence: Any, eui64: str) -> bool:
members = evidence.get("members")
if isinstance(members, list) and eui64 in members:
return True
involved = evidence.get("involved_eui64s")
if isinstance(involved, list) and eui64 in involved:
return True
# Nested partitions[].members (partition_split shape).
partitions = evidence.get("partitions")
if isinstance(partitions, list):
Expand All @@ -78,6 +81,14 @@ def _evidence_implicates_eui(evidence: Any, eui64: str) -> bool:
part_members = part.get("members")
if isinstance(part_members, list) and eui64 in part_members:
return True
sample = part.get("members_sample")
if isinstance(sample, list) and eui64 in sample:
return True
recent_changes = evidence.get("recent_partition_changes")
if isinstance(recent_changes, list):
for row in recent_changes:
if isinstance(row, dict) and row.get("eui64") == eui64:
return True
return False


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1044,24 +1044,6 @@ async def _persist_matter_diagnostics(
"""
rich = _LAST_MATTER_RICH_INFO
if not rich:
# v0.9.43: even with no rich info this cycle (matter-server WS hiccup
# or a single empty poll), we MUST still reconcile the
# ``partition_split`` issue. Otherwise an issue opened on a prior
# cycle becomes immortal — it never sees a non-split observation
# again because the empty-rich early-return below would skip the
# close branch. Latent bug observed live as issue #54 hanging open
# after the partition had long since healed.
try:
active = [
i for i in s.list_active_issues()
if i.get("kind") == "partition_split"
]
for issue in active:
s.close_issue(int(issue["id"]))
except Exception as exc: # noqa: BLE001
log.warning(
"partition_split close-on-empty failed: %s", exc,
)
return {
"nodes_with_diagnostics": 0,
"links_recorded": 0,
Expand Down Expand Up @@ -1590,33 +1572,6 @@ async def _persist_matter_diagnostics(
for pid, members in sorted(live_partitions.items())
]

# Open/close partition_split issue (now reasoning over live partitions only).
try:
active = [i for i in s.list_active_issues() if i.get("kind") == "partition_split"]
if split:
distinct_epids = sorted({
p["extended_pan_id"] for p in partition_summary
if p.get("extended_pan_id")
})
s.open_issue(
kind="partition_split",
severity="warning",
evidence={
"partitions": partition_summary,
"partition_count": len(live_partitions),
# If partitions report different extended_pan_ids,
# this is a credentials-mismatch (stale dataset on
# one device) not an RF-fragmentation issue.
"distinct_extended_pan_ids": distinct_epids,
"credentials_mismatch_suspected": len(distinct_epids) > 1,
},
)
else:
for issue in active:
s.close_issue(int(issue["id"]))
except Exception as exc: # noqa: BLE001
log.warning("Failed to update partition_split issue: %s", exc)

log.info(
"diagnostics persisted: nodes=%d links=%d partitions=%d split=%s "
"changes=%d phantoms_marked=%d phantoms_cleared=%d excluded_partitions=%d "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,57 @@
],
"references": []
},
{
"id": "real_partition_split",
"title": "Real partition split (evidence-backed)",
"applies_to": ["real_partition_split"],
"summary": "Multiple live partitions plus evidence of a device transitioning between partitions. This narrows diagnosis vs. a bare partition-count: it suggests an unstable boundary device or RF gap rather than stale IDs.",
"evidence_to_collect": [
"Issue evidence.partitions and evidence.recent_partition_changes",
"get_mesh_state to confirm current partition membership",
"diff_topology across the split window (if snapshots exist)"
],
"remediation_steps": [
"Identify the device(s) in evidence.recent_partition_changes; check their placement and power stability.",
"Bridge the RF gap between partitions (add/relocate a router) and re-run ingest.",
"If partitions disagree on extended_pan_id, resolve credentials drift (re-commission the minority leader)."
],
"references": []
},
{
"id": "dead_link_reference",
"title": "Dead-link reference (unknown neighbor EUI64)",
"applies_to": ["dead_link_reference"],
"summary": "A router references an EUI64 that is not present in the registry-backed nodes table, persisted across multiple ingestion ticks. This usually indicates a recommissioned device leaving stale neighbor/route cache entries.",
"evidence_to_collect": [
"Issue evidence.references[] (reporter, unknown neighbor, source, seen_count)",
"get_neighbors for the reporter router to see link_established/allocated state",
"list_all_nodes to confirm the neighbor truly is absent"
],
"remediation_steps": [
"If the neighbor is a known physical device, re-commission it so HA registry matches the mesh identity.",
"Restart the reporter router to flush stale neighbor cache entries.",
"If references persist and the neighbor is truly gone, inspect for duplicate identities or a stuck child table."
],
"references": []
},
{
"id": "route_to_otbr_unreachable",
"title": "Route to OTBR unreachable (loop/unknown next hop)",
"applies_to": ["route_to_otbr_unreachable"],
"summary": "Walking the next-hop chain from a router to the OTBR terminates in a loop, unknown next hop, or missing route-table entry. This points at routing-table corruption, partition mismatch, or a missing upstream neighbor relationship.",
"evidence_to_collect": [
"Issue evidence.route_walk (hops + issues)",
"get_mesh_state to confirm partitions and current links",
"get_neighbors for the router and its first upstream hop"
],
"remediation_steps": [
"If the route walk reports different_partition, resolve the partition split first.",
"Restart the affected router to repopulate its route table.",
"If unknown_next_hop persists, inspect the router's neighbor table for missing upstream links and RF quality."
],
"references": []
},
{
"id": "sed_battery_drain",
"title": "Sleepy-end-device unexpected battery drain",
Expand Down
Loading