Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pyathena/arrow/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,5 @@ def __init__(self) -> None:
)

def convert(self, type_: str, value: str | None, type_hint: str | None = None) -> Any | None:
pass
converter = self.get(type_)
return converter(value)
28 changes: 25 additions & 3 deletions pyathena/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,20 @@
import binascii
import json
import logging
import re
from abc import ABCMeta, abstractmethod
from collections.abc import Callable
from copy import deepcopy
from datetime import date, datetime, time
from decimal import Decimal
from typing import Any
from typing import Any, ClassVar

from dateutil.tz import gettz

from pyathena.parser import (
TypedValueConverter,
TypeNode,
TypeSignatureParser,
_normalize_hive_syntax,
_split_array_items,
)
from pyathena.util import strtobool
Expand Down Expand Up @@ -551,6 +551,9 @@ class DefaultTypeConverter(Converter):
['1', '2', '3']
"""

_HIVE_SYNTAX_RE: ClassVar[re.Pattern[str]] = re.compile(r"[<>:]")
_HIVE_REPLACEMENTS: ClassVar[dict[str, str]] = {"<": "(", ">": ")", ":": " "}

def __init__(self) -> None:
super().__init__(mappings=deepcopy(_DEFAULT_CONVERTERS), default=_to_default)
self._parser = TypeSignatureParser()
Expand All @@ -561,6 +564,25 @@ def __init__(self) -> None:
)
self._parsed_hints: dict[str, TypeNode] = {}

@staticmethod
def _normalize_hive_syntax(type_str: str) -> str:
"""Normalize Hive-style DDL syntax to Trino-style.

Converts angle-bracket notation (``array<struct<a:int>>``) to
parenthesized notation (``array(struct(a int))``).

Args:
type_str: Type signature string, possibly using Hive syntax.

Returns:
Normalized type signature using Trino-style parenthesized notation.
"""
if "<" not in type_str:
return type_str
return DefaultTypeConverter._HIVE_SYNTAX_RE.sub(
lambda m: DefaultTypeConverter._HIVE_REPLACEMENTS[m.group()], type_str
)

def convert(self, type_: str, value: str | None, type_hint: str | None = None) -> Any | None:
"""Convert a string value to the appropriate Python type.

Expand Down Expand Up @@ -605,7 +627,7 @@ def _parse_type_hint(self, type_hint: str) -> TypeNode:
Returns:
Parsed TypeNode.
"""
normalized = _normalize_hive_syntax(type_hint)
normalized = self._normalize_hive_syntax(type_hint)
if normalized not in self._parsed_hints:
self._parsed_hints[normalized] = self._parser.parse(normalized)
return self._parsed_hints[normalized]
6 changes: 4 additions & 2 deletions pyathena/pandas/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ def _dtypes(self) -> dict[str, type[Any]]:
return self.__dtypes

def convert(self, type_: str, value: str | None, type_hint: str | None = None) -> Any | None:
pass
converter = self.get(type_)
return converter(value)


class DefaultPandasUnloadTypeConverter(Converter):
Expand All @@ -104,4 +105,5 @@ def __init__(self) -> None:
)

def convert(self, type_: str, value: str | None, type_hint: str | None = None) -> Any | None:
pass
converter = self.get(type_)
return converter(value)
23 changes: 0 additions & 23 deletions pyathena/parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

import json
import re
from collections.abc import Callable
from dataclasses import dataclass, field
from typing import Any
Expand All @@ -11,28 +10,6 @@
"int": "integer",
}

# Pattern for normalizing Hive-style type signatures to Trino-style.
# Matches angle brackets and colons used in Hive DDL (e.g., array<struct<a:int>>).
_HIVE_SYNTAX_RE: re.Pattern[str] = re.compile(r"[<>:]")
_HIVE_REPLACEMENTS: dict[str, str] = {"<": "(", ">": ")", ":": " "}


def _normalize_hive_syntax(type_str: str) -> str:
"""Normalize Hive-style DDL syntax to Trino-style.

Converts angle-bracket notation (``array<struct<a:int>>``) to
parenthesized notation (``array(struct(a int))``).

Args:
type_str: Type signature string, possibly using Hive syntax.

Returns:
Normalized type signature using Trino-style parenthesized notation.
"""
if "<" not in type_str:
return type_str
return _HIVE_SYNTAX_RE.sub(lambda m: _HIVE_REPLACEMENTS[m.group()], type_str)


def _split_array_items(inner: str) -> list[str]:
"""Split array items by comma, respecting brace and bracket groupings.
Expand Down
3 changes: 2 additions & 1 deletion pyathena/polars/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,4 +128,5 @@ def __init__(self) -> None:
)

def convert(self, type_: str, value: str | None, type_hint: str | None = None) -> Any | None:
pass
converter = self.get(type_)
return converter(value)
8 changes: 8 additions & 0 deletions tests/pyathena/arrow/test_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from pyathena.arrow.converter import DefaultArrowUnloadTypeConverter


class TestDefaultArrowUnloadTypeConverter:
def test_convert_delegates_to_default(self):
"""convert() dispatches through the default converter instead of returning None."""
converter = DefaultArrowUnloadTypeConverter()
assert converter.convert("varchar", "hello") == "hello"
25 changes: 25 additions & 0 deletions tests/pyathena/pandas/test_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from pyathena.pandas.converter import (
DefaultPandasTypeConverter,
DefaultPandasUnloadTypeConverter,
)


class TestDefaultPandasTypeConverter:
def test_convert_delegates_to_mapping(self):
"""convert() dispatches through self.get(type_) instead of returning None.

Verifies both the explicit mapping path (boolean → _to_boolean)
and the default converter path (varchar → _to_default), plus
None passthrough.
"""
converter = DefaultPandasTypeConverter()
assert converter.convert("boolean", "true") is True
assert converter.convert("varchar", "hello") == "hello"
assert converter.convert("varchar", None) is None


class TestDefaultPandasUnloadTypeConverter:
def test_convert_delegates_to_default(self):
"""convert() dispatches through the default converter instead of returning None."""
converter = DefaultPandasUnloadTypeConverter()
assert converter.convert("varchar", "hello") == "hello"
8 changes: 8 additions & 0 deletions tests/pyathena/polars/test_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from pyathena.polars.converter import DefaultPolarsUnloadTypeConverter


class TestDefaultPolarsUnloadTypeConverter:
def test_convert_delegates_to_default(self):
"""convert() dispatches through the default converter instead of returning None."""
converter = DefaultPolarsUnloadTypeConverter()
assert converter.convert("varchar", "hello") == "hello"
47 changes: 47 additions & 0 deletions tests/pyathena/test_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,3 +408,50 @@ def test_hive_syntax_caching(self):
# Both should normalize to "array(integer)" in the cache
assert "array(integer)" in converter._parsed_hints
assert len(converter._parsed_hints) == 1

def test_normalize_hive_syntax_noop(self):
"""Trino-style input passes through unchanged."""
assert DefaultTypeConverter._normalize_hive_syntax("array(integer)") == "array(integer)"

def test_normalize_hive_syntax_replaces(self):
assert (
DefaultTypeConverter._normalize_hive_syntax("array<struct<a:int>>")
== "array(struct(a int))"
)

def test_normalize_hive_syntax_struct(self):
converter = DefaultTypeConverter()
result = converter.convert(
"row",
"{name=Alice, age=25}",
type_hint="struct<name:varchar,age:int>",
)
assert result == {"name": "Alice", "age": 25}

def test_normalize_hive_syntax_nested(self):
converter = DefaultTypeConverter()
result = converter.convert(
"array",
"[{a=1, b=hello}, {a=2, b=world}]",
type_hint="array<struct<a:int,b:varchar>>",
)
assert result == [{"a": 1, "b": "hello"}, {"a": 2, "b": "world"}]

def test_normalize_hive_syntax_map(self):
converter = DefaultTypeConverter()
result = converter.convert(
"map",
'{"x": 1, "y": 2}',
type_hint="map<string,int>",
)
assert result == {"x": 1, "y": 2}

def test_normalize_hive_syntax_mixed(self):
"""Hive angle brackets wrapping Trino-style parenthesized inner type."""
converter = DefaultTypeConverter()
result = converter.convert(
"array",
"[{a=1, b=hello}]",
type_hint="array<row(a int, b varchar)>",
)
assert result == [{"a": 1, "b": "hello"}]
50 changes: 0 additions & 50 deletions tests/pyathena/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
TypedValueConverter,
TypeNode,
TypeSignatureParser,
_normalize_hive_syntax,
)


Expand Down Expand Up @@ -109,55 +108,6 @@ def test_type_alias_in_complex_type(self):
assert node.type_name == "array"
assert node.children[0].type_name == "integer"

def test_hive_syntax_simple(self):
parser = TypeSignatureParser()
node = parser.parse(_normalize_hive_syntax("array<int>"))
assert node.type_name == "array"
assert node.children[0].type_name == "integer"

def test_hive_syntax_struct(self):
parser = TypeSignatureParser()
node = parser.parse(_normalize_hive_syntax("struct<a:int,b:varchar>"))
assert node.type_name == "struct"
assert node.field_names == ["a", "b"]
assert node.children[0].type_name == "integer"
assert node.children[1].type_name == "varchar"

def test_hive_syntax_nested(self):
parser = TypeSignatureParser()
node = parser.parse(_normalize_hive_syntax("array<struct<a:int,b:varchar>>"))
assert node.type_name == "array"
struct_node = node.children[0]
assert struct_node.type_name == "struct"
assert struct_node.field_names == ["a", "b"]
assert struct_node.children[0].type_name == "integer"
assert struct_node.children[1].type_name == "varchar"

def test_hive_syntax_map(self):
parser = TypeSignatureParser()
node = parser.parse(_normalize_hive_syntax("map<string,int>"))
assert node.type_name == "map"
assert node.children[0].type_name == "string"
assert node.children[1].type_name == "integer"

def test_mixed_syntax(self):
"""Hive angle brackets wrapping Trino-style parenthesized inner type."""
parser = TypeSignatureParser()
node = parser.parse(_normalize_hive_syntax("array<row(a int, b varchar)>"))
assert node.type_name == "array"
row_node = node.children[0]
assert row_node.type_name == "row"
assert row_node.field_names == ["a", "b"]
assert row_node.children[0].type_name == "integer"
assert row_node.children[1].type_name == "varchar"

def test_normalize_hive_syntax_noop(self):
"""Trino-style input passes through unchanged."""
assert _normalize_hive_syntax("array(integer)") == "array(integer)"

def test_normalize_hive_syntax_replaces(self):
assert _normalize_hive_syntax("array<struct<a:int>>") == "array(struct(a int))"

def test_trailing_modifier_after_paren(self):
"""Type with content after closing paren should not break parsing."""
parser = TypeSignatureParser()
Expand Down