diff --git a/pyathena/arrow/converter.py b/pyathena/arrow/converter.py index 26774186..cd9c5589 100644 --- a/pyathena/arrow/converter.py +++ b/pyathena/arrow/converter.py @@ -115,4 +115,5 @@ def __init__(self) -> None: ) def convert(self, type_: str, value: str | None, type_hint: str | None = None) -> Any | None: - pass + converter = self.get(type_) + return converter(value) diff --git a/pyathena/converter.py b/pyathena/converter.py index 26c43f77..7432d5b9 100644 --- a/pyathena/converter.py +++ b/pyathena/converter.py @@ -3,12 +3,13 @@ import binascii import json import logging +import re from abc import ABCMeta, abstractmethod from collections.abc import Callable from copy import deepcopy from datetime import date, datetime, time from decimal import Decimal -from typing import Any +from typing import Any, ClassVar from dateutil.tz import gettz @@ -16,7 +17,6 @@ TypedValueConverter, TypeNode, TypeSignatureParser, - _normalize_hive_syntax, _split_array_items, ) from pyathena.util import strtobool @@ -551,6 +551,9 @@ class DefaultTypeConverter(Converter): ['1', '2', '3'] """ + _HIVE_SYNTAX_RE: ClassVar[re.Pattern[str]] = re.compile(r"[<>:]") + _HIVE_REPLACEMENTS: ClassVar[dict[str, str]] = {"<": "(", ">": ")", ":": " "} + def __init__(self) -> None: super().__init__(mappings=deepcopy(_DEFAULT_CONVERTERS), default=_to_default) self._parser = TypeSignatureParser() @@ -561,6 +564,25 @@ def __init__(self) -> None: ) self._parsed_hints: dict[str, TypeNode] = {} + @staticmethod + def _normalize_hive_syntax(type_str: str) -> str: + """Normalize Hive-style DDL syntax to Trino-style. + + Converts angle-bracket notation (``array>``) to + parenthesized notation (``array(struct(a int))``). + + Args: + type_str: Type signature string, possibly using Hive syntax. + + Returns: + Normalized type signature using Trino-style parenthesized notation. + """ + if "<" not in type_str: + return type_str + return DefaultTypeConverter._HIVE_SYNTAX_RE.sub( + lambda m: DefaultTypeConverter._HIVE_REPLACEMENTS[m.group()], type_str + ) + def convert(self, type_: str, value: str | None, type_hint: str | None = None) -> Any | None: """Convert a string value to the appropriate Python type. @@ -605,7 +627,7 @@ def _parse_type_hint(self, type_hint: str) -> TypeNode: Returns: Parsed TypeNode. """ - normalized = _normalize_hive_syntax(type_hint) + normalized = self._normalize_hive_syntax(type_hint) if normalized not in self._parsed_hints: self._parsed_hints[normalized] = self._parser.parse(normalized) return self._parsed_hints[normalized] diff --git a/pyathena/pandas/converter.py b/pyathena/pandas/converter.py index 29519bb3..87417a87 100644 --- a/pyathena/pandas/converter.py +++ b/pyathena/pandas/converter.py @@ -81,7 +81,8 @@ def _dtypes(self) -> dict[str, type[Any]]: return self.__dtypes def convert(self, type_: str, value: str | None, type_hint: str | None = None) -> Any | None: - pass + converter = self.get(type_) + return converter(value) class DefaultPandasUnloadTypeConverter(Converter): @@ -104,4 +105,5 @@ def __init__(self) -> None: ) def convert(self, type_: str, value: str | None, type_hint: str | None = None) -> Any | None: - pass + converter = self.get(type_) + return converter(value) diff --git a/pyathena/parser.py b/pyathena/parser.py index c6b70eb8..e263ed53 100644 --- a/pyathena/parser.py +++ b/pyathena/parser.py @@ -1,7 +1,6 @@ from __future__ import annotations import json -import re from collections.abc import Callable from dataclasses import dataclass, field from typing import Any @@ -11,28 +10,6 @@ "int": "integer", } -# Pattern for normalizing Hive-style type signatures to Trino-style. -# Matches angle brackets and colons used in Hive DDL (e.g., array>). -_HIVE_SYNTAX_RE: re.Pattern[str] = re.compile(r"[<>:]") -_HIVE_REPLACEMENTS: dict[str, str] = {"<": "(", ">": ")", ":": " "} - - -def _normalize_hive_syntax(type_str: str) -> str: - """Normalize Hive-style DDL syntax to Trino-style. - - Converts angle-bracket notation (``array>``) to - parenthesized notation (``array(struct(a int))``). - - Args: - type_str: Type signature string, possibly using Hive syntax. - - Returns: - Normalized type signature using Trino-style parenthesized notation. - """ - if "<" not in type_str: - return type_str - return _HIVE_SYNTAX_RE.sub(lambda m: _HIVE_REPLACEMENTS[m.group()], type_str) - def _split_array_items(inner: str) -> list[str]: """Split array items by comma, respecting brace and bracket groupings. diff --git a/pyathena/polars/converter.py b/pyathena/polars/converter.py index 356deaf2..7f52c28d 100644 --- a/pyathena/polars/converter.py +++ b/pyathena/polars/converter.py @@ -128,4 +128,5 @@ def __init__(self) -> None: ) def convert(self, type_: str, value: str | None, type_hint: str | None = None) -> Any | None: - pass + converter = self.get(type_) + return converter(value) diff --git a/tests/pyathena/arrow/test_converter.py b/tests/pyathena/arrow/test_converter.py new file mode 100644 index 00000000..ad37a3ea --- /dev/null +++ b/tests/pyathena/arrow/test_converter.py @@ -0,0 +1,8 @@ +from pyathena.arrow.converter import DefaultArrowUnloadTypeConverter + + +class TestDefaultArrowUnloadTypeConverter: + def test_convert_delegates_to_default(self): + """convert() dispatches through the default converter instead of returning None.""" + converter = DefaultArrowUnloadTypeConverter() + assert converter.convert("varchar", "hello") == "hello" diff --git a/tests/pyathena/pandas/test_converter.py b/tests/pyathena/pandas/test_converter.py new file mode 100644 index 00000000..b27d7771 --- /dev/null +++ b/tests/pyathena/pandas/test_converter.py @@ -0,0 +1,25 @@ +from pyathena.pandas.converter import ( + DefaultPandasTypeConverter, + DefaultPandasUnloadTypeConverter, +) + + +class TestDefaultPandasTypeConverter: + def test_convert_delegates_to_mapping(self): + """convert() dispatches through self.get(type_) instead of returning None. + + Verifies both the explicit mapping path (boolean → _to_boolean) + and the default converter path (varchar → _to_default), plus + None passthrough. + """ + converter = DefaultPandasTypeConverter() + assert converter.convert("boolean", "true") is True + assert converter.convert("varchar", "hello") == "hello" + assert converter.convert("varchar", None) is None + + +class TestDefaultPandasUnloadTypeConverter: + def test_convert_delegates_to_default(self): + """convert() dispatches through the default converter instead of returning None.""" + converter = DefaultPandasUnloadTypeConverter() + assert converter.convert("varchar", "hello") == "hello" diff --git a/tests/pyathena/polars/test_converter.py b/tests/pyathena/polars/test_converter.py new file mode 100644 index 00000000..6435fe64 --- /dev/null +++ b/tests/pyathena/polars/test_converter.py @@ -0,0 +1,8 @@ +from pyathena.polars.converter import DefaultPolarsUnloadTypeConverter + + +class TestDefaultPolarsUnloadTypeConverter: + def test_convert_delegates_to_default(self): + """convert() dispatches through the default converter instead of returning None.""" + converter = DefaultPolarsUnloadTypeConverter() + assert converter.convert("varchar", "hello") == "hello" diff --git a/tests/pyathena/test_converter.py b/tests/pyathena/test_converter.py index 39583a24..1efeb8da 100644 --- a/tests/pyathena/test_converter.py +++ b/tests/pyathena/test_converter.py @@ -408,3 +408,50 @@ def test_hive_syntax_caching(self): # Both should normalize to "array(integer)" in the cache assert "array(integer)" in converter._parsed_hints assert len(converter._parsed_hints) == 1 + + def test_normalize_hive_syntax_noop(self): + """Trino-style input passes through unchanged.""" + assert DefaultTypeConverter._normalize_hive_syntax("array(integer)") == "array(integer)" + + def test_normalize_hive_syntax_replaces(self): + assert ( + DefaultTypeConverter._normalize_hive_syntax("array>") + == "array(struct(a int))" + ) + + def test_normalize_hive_syntax_struct(self): + converter = DefaultTypeConverter() + result = converter.convert( + "row", + "{name=Alice, age=25}", + type_hint="struct", + ) + assert result == {"name": "Alice", "age": 25} + + def test_normalize_hive_syntax_nested(self): + converter = DefaultTypeConverter() + result = converter.convert( + "array", + "[{a=1, b=hello}, {a=2, b=world}]", + type_hint="array>", + ) + assert result == [{"a": 1, "b": "hello"}, {"a": 2, "b": "world"}] + + def test_normalize_hive_syntax_map(self): + converter = DefaultTypeConverter() + result = converter.convert( + "map", + '{"x": 1, "y": 2}', + type_hint="map", + ) + assert result == {"x": 1, "y": 2} + + def test_normalize_hive_syntax_mixed(self): + """Hive angle brackets wrapping Trino-style parenthesized inner type.""" + converter = DefaultTypeConverter() + result = converter.convert( + "array", + "[{a=1, b=hello}]", + type_hint="array", + ) + assert result == [{"a": 1, "b": "hello"}] diff --git a/tests/pyathena/test_parser.py b/tests/pyathena/test_parser.py index bd6ab9d7..27d6d431 100644 --- a/tests/pyathena/test_parser.py +++ b/tests/pyathena/test_parser.py @@ -5,7 +5,6 @@ TypedValueConverter, TypeNode, TypeSignatureParser, - _normalize_hive_syntax, ) @@ -109,55 +108,6 @@ def test_type_alias_in_complex_type(self): assert node.type_name == "array" assert node.children[0].type_name == "integer" - def test_hive_syntax_simple(self): - parser = TypeSignatureParser() - node = parser.parse(_normalize_hive_syntax("array")) - assert node.type_name == "array" - assert node.children[0].type_name == "integer" - - def test_hive_syntax_struct(self): - parser = TypeSignatureParser() - node = parser.parse(_normalize_hive_syntax("struct")) - assert node.type_name == "struct" - assert node.field_names == ["a", "b"] - assert node.children[0].type_name == "integer" - assert node.children[1].type_name == "varchar" - - def test_hive_syntax_nested(self): - parser = TypeSignatureParser() - node = parser.parse(_normalize_hive_syntax("array>")) - assert node.type_name == "array" - struct_node = node.children[0] - assert struct_node.type_name == "struct" - assert struct_node.field_names == ["a", "b"] - assert struct_node.children[0].type_name == "integer" - assert struct_node.children[1].type_name == "varchar" - - def test_hive_syntax_map(self): - parser = TypeSignatureParser() - node = parser.parse(_normalize_hive_syntax("map")) - assert node.type_name == "map" - assert node.children[0].type_name == "string" - assert node.children[1].type_name == "integer" - - def test_mixed_syntax(self): - """Hive angle brackets wrapping Trino-style parenthesized inner type.""" - parser = TypeSignatureParser() - node = parser.parse(_normalize_hive_syntax("array")) - assert node.type_name == "array" - row_node = node.children[0] - assert row_node.type_name == "row" - assert row_node.field_names == ["a", "b"] - assert row_node.children[0].type_name == "integer" - assert row_node.children[1].type_name == "varchar" - - def test_normalize_hive_syntax_noop(self): - """Trino-style input passes through unchanged.""" - assert _normalize_hive_syntax("array(integer)") == "array(integer)" - - def test_normalize_hive_syntax_replaces(self): - assert _normalize_hive_syntax("array>") == "array(struct(a int))" - def test_trailing_modifier_after_paren(self): """Type with content after closing paren should not break parsing.""" parser = TypeSignatureParser()