Skip to content

Commit ecfde21

Browse files
committed
fix(robot): correct type-hint handling in semantic analyzer
strip variable type hints only in declaration contexts preserve typed names in usage lookups resolve nested typed variable declarations correctly stop splitting type hints on additional colons in tokenizer add regression tests for variables, VAR, assignments and arguments
1 parent 23a538c commit ecfde21

6 files changed

Lines changed: 312 additions & 83 deletions

File tree

packages/robot/src/robotcode/robot/diagnostics/semantic_analyzer/analyzer.py

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -354,8 +354,7 @@ def _visit_Variable(self, node: Variable) -> None: # noqa: N802
354354

355355
# Resolve nested variable references inside the variable name
356356
# so that e.g. ${a} in ${INVALID VAR ${a}} gets hover/go-to-definition.
357-
inner_token = Token(Token.ARGUMENT, matcher.base, name_token.lineno, name_token.col_offset + 2)
358-
for var_token, var in self._iter_variables_from_occurrences(inner_token):
357+
for var_token, var in self._iter_nested_variables_from_declaration_token(name_token):
359358
self._handle_find_variable_result(var_token, var)
360359

361360
resolved = self._try_resolve_nested_variable_base(matcher.identifier, matcher.base, name_token)
@@ -542,8 +541,7 @@ def visit_Var(self, node: Var) -> None: # noqa: N802
542541
)
543542
return
544543

545-
inner_token = Token(Token.ARGUMENT, matcher.base, name_token.lineno, name_token.col_offset + 2)
546-
for var_token, var in self._iter_variables_from_occurrences(inner_token):
544+
for var_token, var in self._iter_nested_variables_from_declaration_token(name_token):
547545
self._handle_find_variable_result(var_token, var)
548546

549547
resolved = self._try_resolve_nested_variable_base(matcher.identifier, matcher.base, name_token)
@@ -846,8 +844,14 @@ def visit(self, node: ast.AST) -> None:
846844
finally:
847845
self._node_stack.pop()
848846

849-
def _analyze_token_variables(self, token: Token, severity: DiagnosticSeverity = DiagnosticSeverity.ERROR) -> None:
850-
for var_token, var in self._iter_variables_from_occurrences(token):
847+
def _analyze_token_variables(
848+
self,
849+
token: Token,
850+
severity: DiagnosticSeverity = DiagnosticSeverity.ERROR,
851+
*,
852+
parse_type: bool = False,
853+
) -> None:
854+
for var_token, var in self._iter_variables_from_occurrences(token, parse_type=parse_type):
851855
self._handle_find_variable_result(var_token, var, severity)
852856

853857
def _handle_find_variable_result(
@@ -1820,8 +1824,14 @@ def _analyze_assign_statement(self, node: Statement) -> None:
18201824
if empty_var in matcher.base:
18211825
return
18221826

1823-
inner_token = Token(Token.ARGUMENT, matcher.base, assign_token.lineno, assign_token.col_offset + 2)
1824-
for var_token, var in self._iter_variables_from_occurrences(inner_token):
1827+
assign_name_token = Token(
1828+
Token.VARIABLE,
1829+
assign_token.value[:-1].rstrip() if assign_token.value.endswith("=") else assign_token.value,
1830+
assign_token.lineno,
1831+
assign_token.col_offset,
1832+
assign_token.error,
1833+
)
1834+
for var_token, var in self._iter_nested_variables_from_declaration_token(assign_name_token):
18251835
self._handle_find_variable_result(var_token, var)
18261836

18271837
resolved = self._try_resolve_nested_variable_base(matcher.identifier, matcher.base, assign_token)
@@ -2721,11 +2731,16 @@ def _resolve_string_expression(self, raw_str: str, depth: int) -> Union[str, Lit
27212731

27222732
# --- Variable token iteration ---
27232733

2724-
def _iter_variables_from_occurrences(self, token: Token) -> Iterator[Tuple[Token, VariableDefinition]]:
2725-
for occurrence in self._iter_variable_occurrences(token):
2734+
def _iter_variables_from_occurrences(
2735+
self,
2736+
token: Token,
2737+
*,
2738+
parse_type: bool = False,
2739+
) -> Iterator[Tuple[Token, VariableDefinition]]:
2740+
for occurrence in self._iter_variable_occurrences(token, parse_type=parse_type):
27262741
yield from self._resolve_variable_occurrence(occurrence)
27272742

2728-
def _iter_variable_occurrences(self, token: Token) -> Iterator[VariableOccurrence]:
2743+
def _iter_variable_occurrences(self, token: Token, *, parse_type: bool = False) -> Iterator[VariableOccurrence]:
27292744
def exception_handler(e: BaseException, t: Token) -> None:
27302745
self._append_diagnostics(
27312746
range_from_token(t),
@@ -2737,11 +2752,22 @@ def exception_handler(e: BaseException, t: Token) -> None:
27372752
yield from iter_variable_occurrences_from_token(
27382753
token,
27392754
identifiers="$@&%",
2755+
parse_type=parse_type,
27402756
ignore_errors=True,
27412757
extra_types=None,
27422758
exception_handler=exception_handler,
27432759
)
27442760

2761+
def _iter_nested_variables_from_declaration_token(self, token: Token) -> Iterator[Tuple[Token, VariableDefinition]]:
2762+
skipped_root = False
2763+
2764+
for occurrence in self._iter_variable_occurrences(token, parse_type=True):
2765+
if not skipped_root:
2766+
skipped_root = True
2767+
continue
2768+
2769+
yield from self._resolve_variable_occurrence(occurrence)
2770+
27452771
def _resolve_variable_occurrence(
27462772
self,
27472773
occurrence: VariableOccurrence,

packages/robot/src/robotcode/robot/diagnostics/semantic_analyzer/variable_tokenizer.py

Lines changed: 55 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -50,15 +50,17 @@ class VariableOccurrence:
5050
strip_for_reference: bool = True
5151

5252

53-
def build_variable_occurrence(value: str, line: int, col_offset: int) -> VariableOccurrence:
53+
def build_variable_occurrence(
54+
value: str, line: int, col_offset: int, *, parse_type: bool = False
55+
) -> VariableOccurrence:
5456
"""Parse a single variable expression once and return shared occurrence data."""
5557
sub_tokens = build_variable_sub_tokens(value, line, col_offset)
5658
return VariableOccurrence(
5759
value=value,
5860
line=line,
5961
col_offset=col_offset,
6062
length=len(value),
61-
lookup_name=normalize_variable_lookup_name(value),
63+
lookup_name=normalize_variable_lookup_name(value, parse_type=parse_type),
6264
semantic_sub_tokens=sub_tokens if sub_tokens else None,
6365
)
6466

@@ -157,6 +159,7 @@ def iter_variable_occurrences_from_token(
157159
token: Token,
158160
identifiers: str = "$@&%",
159161
*,
162+
parse_type: bool = False,
160163
ignore_errors: bool = False,
161164
extra_types: Optional[Set[str]] = None,
162165
exception_handler: Optional[Callable[[Exception, Token], None]] = None,
@@ -168,7 +171,7 @@ def iter_variable_occurrences_from_token(
168171
"""
169172
parsed_token = token
170173
if token.type == Token.VARIABLE and token.value.endswith("="):
171-
match = search_variable(token.value, ignore_errors=True)
174+
match = search_variable(token.value, ignore_errors=True, parse_type=parse_type)
172175
if not match.is_assign(allow_assign_mark=True):
173176
return
174177

@@ -190,18 +193,24 @@ def iter_variable_occurrences_from_token(
190193
if sub_token.type != Token.VARIABLE:
191194
continue
192195

193-
occurrence = build_variable_occurrence(sub_token.value, sub_token.lineno, sub_token.col_offset)
196+
occurrence = build_variable_occurrence(
197+
sub_token.value,
198+
sub_token.lineno,
199+
sub_token.col_offset,
200+
parse_type=parse_type,
201+
)
194202
yield from iter_related_occurrences(occurrence)
195203

196204

197-
def normalize_variable_lookup_name(value: str) -> Optional[str]:
205+
def normalize_variable_lookup_name(value: str, *, parse_type: bool = False) -> Optional[str]:
198206
"""Normalize a variable expression to a lookup name for static resolution.
199207
200208
Examples:
201209
- `${obj.attr}` -> `${obj}`
202210
- `${var}[0][x]` -> `${var}`
203211
- `%{HOME=default}` -> `%{HOME}`
204212
- `${{expr}}` -> None
213+
- `${age: int}` -> `${age}` only when ``parse_type=True`` (declaration context)
205214
"""
206215
if not value or len(value) < 3:
207216
return None
@@ -238,28 +247,41 @@ def normalize_variable_lookup_name(value: str) -> Optional[str]:
238247
if not inner:
239248
return None
240249

241-
# Try extended syntax first: extract the base variable name before any
242-
# operator/expression. This must happen *before* the nested-variable
243-
# guard because the tail may contain nested variables (e.g.
244-
# ``${A + '${B}'}``) while the base name ``A`` is perfectly resolvable.
245-
# Skip when the extension starts with a variable identifier (``${``,
246-
# ``@{`` etc.) — that indicates a **nested variable name** like
247-
# ``${cfg_${env}}``, not an expression.
248-
extended_match = _MATCH_EXTENDED.match(inner)
249-
if extended_match:
250-
ext_part = extended_match.group(2)
251-
if not ext_part.startswith(("${", "@{", "&{", "%{")):
252-
inner = extended_match.group(1)
250+
# Type hint check must precede extended-syntax matching: `: ` in `${age: int}` would
251+
# otherwise be consumed by _MATCH_EXTENDED (which accepts any [^\s\w] operator) and
252+
# silently strip the type even in reference contexts. RF itself only strips the type
253+
# hint when search_variable() is called with parse_type=True — i.e. in declaration
254+
# contexts (Variables section, [Arguments], VAR, FOR, Assignment).
255+
if prefix == "$" and ": " in inner:
256+
if parse_type:
257+
inner = inner.split(": ", 1)[0]
258+
# else: keep inner with type hint intact; the full `${age: int}` is the lookup name.
259+
else:
260+
# Try extended syntax first: extract the base variable name before any
261+
# operator/expression. This must happen *before* the nested-variable
262+
# guard because the tail may contain nested variables (e.g.
263+
# ``${A + '${B}'}``) while the base name ``A`` is perfectly resolvable.
264+
# Skip when the extension starts with a variable identifier (``${``,
265+
# ``@{`` etc.) — that indicates a **nested variable name** like
266+
# ``${cfg_${env}}``, not an expression.
267+
extended_match = _MATCH_EXTENDED.match(inner)
268+
if extended_match:
269+
ext_part = extended_match.group(2)
270+
if not ext_part.startswith(("${", "@{", "&{", "%{")):
271+
inner = extended_match.group(1)
253272

254273
if "${" in inner or "@{" in inner or "&{" in inner or "%{" in inner:
255274
return None
256275

257276
if prefix == "%" and "=" in inner:
258277
inner = inner.split("=", 1)[0]
259-
elif prefix == "$" and ": " in inner:
260-
inner = inner.split(": ", 1)[0]
261-
elif prefix == "$" and ":" in inner:
262-
inner = inner.split(":", 1)[0]
278+
elif prefix == "$" and ":" in inner and ": " not in inner:
279+
# Bare colon: embedded argument pattern ${arg:\d+}.
280+
# Type hints (`: ` with space) are handled above, gated by parse_type.
281+
# Preserve builtin ${:}; only treat ':' as pattern separator when both sides exist.
282+
head, _, tail = inner.partition(":")
283+
if head and tail:
284+
inner = head
263285

264286
inner = inner.strip()
265287
if not inner:
@@ -491,7 +513,9 @@ def _decompose_variable_inner(
491513
if "${" in inner or "@{" in inner or "&{" in inner or "%{" in inner:
492514
return _decompose_nested_variable(inner, line, col_offset)
493515

494-
# Check for type hint: ${age: int} or ${name: str:\w+}
516+
# Check for type hint: ${age: int}
517+
# RF uses ': ' (colon + space) as the type separator.
518+
# Everything after ': ' is the type hint — no further splitting.
495519
if ": " in inner and prefix_char == "$":
496520
colon_pos = inner.index(": ")
497521
base = inner[:colon_pos]
@@ -516,48 +540,15 @@ def _decompose_variable_inner(
516540
)
517541
)
518542

519-
# Check for pattern after type: ${name: str:\w+}
520-
if ":" in rest:
521-
pattern_sep = rest.index(":")
522-
type_hint = rest[:pattern_sep]
523-
pattern = rest[pattern_sep + 1 :]
524-
tokens.append(
525-
SemanticToken(
526-
kind=TokenKind.VARIABLE_TYPE_HINT,
527-
value=type_hint,
528-
line=line,
529-
col_offset=col_offset + colon_pos + 2,
530-
length=len(type_hint),
531-
)
532-
)
533-
tokens.append(
534-
SemanticToken(
535-
kind=TokenKind.VARIABLE_PATTERN_SEPARATOR,
536-
value=":",
537-
line=line,
538-
col_offset=col_offset + colon_pos + 2 + len(type_hint),
539-
length=1,
540-
)
541-
)
542-
tokens.append(
543-
SemanticToken(
544-
kind=TokenKind.VARIABLE_PATTERN,
545-
value=pattern,
546-
line=line,
547-
col_offset=col_offset + colon_pos + 2 + len(type_hint) + 1,
548-
length=len(pattern),
549-
)
550-
)
551-
else:
552-
tokens.append(
553-
SemanticToken(
554-
kind=TokenKind.VARIABLE_TYPE_HINT,
555-
value=rest,
556-
line=line,
557-
col_offset=col_offset + colon_pos + 2,
558-
length=len(rest),
559-
)
543+
tokens.append(
544+
SemanticToken(
545+
kind=TokenKind.VARIABLE_TYPE_HINT,
546+
value=rest,
547+
line=line,
548+
col_offset=col_offset + colon_pos + 2,
549+
length=len(rest),
560550
)
551+
)
561552
return tokens
562553

563554
# Check for embedded pattern without type: ${arg:\d+}
@@ -855,7 +846,7 @@ def _iter_related_occurrences_from_token(
855846
line=token.line,
856847
col_offset=token.col_offset,
857848
length=token.length,
858-
lookup_name=normalize_variable_lookup_name(token.value),
849+
lookup_name=normalize_variable_lookup_name(token.value, parse_type=False),
859850
semantic_sub_tokens=token.sub_tokens if token.sub_tokens else None,
860851
)
861852
yield nested

tests/robotcode/robot/diagnostics/test_semantic_analyzer/test_analyzer.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from typing import List
1414
from unittest.mock import MagicMock
1515

16+
import pytest
1617
from robot.api import get_model
1718

1819
from robotcode.robot.diagnostics.analyzer_result import AnalyzerResult
@@ -32,6 +33,7 @@
3233
WhileStatement,
3334
)
3435
from robotcode.robot.diagnostics.variable_scope import VariableScope
36+
from robotcode.robot.utils import RF_VERSION
3537

3638

3739
def _parse(text: str) -> AST:
@@ -648,6 +650,82 @@ def test_statement_count_reasonable(self) -> None:
648650
assert len(result.semantic_model.statements) >= 4
649651

650652

653+
@pytest.mark.skipif(RF_VERSION < (7, 3), reason="Argument type hints require RF >= 7.3")
654+
class TestArgumentTypeHints:
655+
def test_typed_arguments_define_untyped_lookup_variable(self) -> None:
656+
result = _run_analyzer(
657+
"""\
658+
*** Keywords ***
659+
K
660+
[Arguments] ${a: int}
661+
Log ${a}
662+
663+
*** Test Cases ***
664+
T
665+
K 1
666+
"""
667+
)
668+
669+
var_names = {v.name for v in result.variable_references}
670+
assert "${a}" in var_names
671+
672+
variable_not_found = [d for d in result.diagnostics if str(d.code) == "VariableNotFound"]
673+
assert len(variable_not_found) == 0
674+
675+
def test_typed_argument_reference_is_not_normalized_in_usage(self) -> None:
676+
result = _run_analyzer(
677+
"""\
678+
*** Keywords ***
679+
K
680+
[Arguments] ${a: int}
681+
Log ${a: int}
682+
683+
*** Test Cases ***
684+
T
685+
K 1
686+
"""
687+
)
688+
689+
assert any(str(d.code) == "VariableNotFound" and "${a: int}" in d.message for d in result.diagnostics)
690+
691+
def test_complex_typed_arguments_define_untyped_lookup_variable(self) -> None:
692+
result = _run_analyzer(
693+
"""\
694+
*** Keywords ***
695+
K
696+
[Arguments] ${a: Literal["abc", ":", ";"] | List[Literal[1,2,3]]}
697+
Log ${a}
698+
699+
*** Test Cases ***
700+
T
701+
K abc
702+
"""
703+
)
704+
705+
var_names = {v.name for v in result.variable_references}
706+
assert "${a}" in var_names
707+
assert not any(str(d.code) == "VariableNotFound" and "${a}" in d.message for d in result.diagnostics)
708+
709+
def test_complex_typed_argument_reference_is_not_normalized_in_usage(self) -> None:
710+
result = _run_analyzer(
711+
"""\
712+
*** Keywords ***
713+
K
714+
[Arguments] ${a: Literal["abc", ":", ";"] | List[Literal[1,2,3]]}
715+
Log ${a: Literal["abc", ":", ";"] | List[Literal[1,2,3]]}
716+
717+
*** Test Cases ***
718+
T
719+
K abc
720+
"""
721+
)
722+
723+
assert any(
724+
str(d.code) == "VariableNotFound" and '${a: Literal["abc", ":", ";"] | List[Literal[1,2,3]]}' in d.message
725+
for d in result.diagnostics
726+
)
727+
728+
651729
# --- Structural statement visitors (END, BREAK, CONTINUE, TRY, ELSE, FINALLY) ---
652730

653731

0 commit comments

Comments
 (0)