Python: New Feature: Adding Pandas DataFrame Type Introspection

---
name: Adding Introspection of Pandas DataFrame
about: Currently when using pandas dataframe type in the kernel functions, registering the kernel throws a NameError: name 'weakref' is not defined. Adding the support for introspection of a pandas dataframe type will allow the kernel and the agents to execute functions manipulating dataframes and return and share results amongst themselves in a structured fashion. 

```
## function to get list of column names - wrapper to make pandas function available to the agent

def get_dataframe_columnnames(df: pd.DataFrame) -> List[str]:
    return list(df.columns)

# function to get the unique values in a column - wrapper to make pandas function available to the agent

def get_distinct_column_values(df: pd.DataFrame, column_name: str) -> List[str]:
    return list(df[column_name].unique())


class ColumnMatchingPlugin:
    @kernel_function(name="GetColumnNames", description="Get the list of column names in a dataframe")
    async def get_dataframe_columnnames(self, df: Annotated[pd.DataFrame, 'the dataframe from which to extract the column names from']) -> List[str]:
        return get_dataframe_columnnames(df)

    @kernel_function(name="GetColumnValues", description="Get the list of values under a column ")
    async def get_distinct_column_values(self, df, column_name: str) -> List[str]:
        return get_distinct_column_values(df, column_name)

## checking why plugin failed?

from semantic_kernel import Kernel

kernel = Kernel()
plugin = ColumnMatchingPlugin()
kernel.add_plugin(plugin, plugin_name="column_matching_plugin")
```

gives the following error

```
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[73], line 7
      5 kernel = Kernel()
      6 plugin = ColumnMatchingPlugin()
----> 7 kernel.add_plugin(plugin, plugin_name="column_matching_plugin")

File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/functions/kernel_function_extension.py:109, in KernelFunctionExtension.add_plugin(self, plugin, plugin_name, parent_directory, description, class_init_arguments, encoding)
    107     raise TypeError("plugin_name must be a string.")
    108 if plugin:
--> 109     self.plugins[plugin_name] = KernelPlugin.from_object(
    110         plugin_name=plugin_name, plugin_instance=plugin, description=description
    111     )
    112     if isinstance(plugin, AddToKernelCallbackProtocol):
    113         plugin.added_to_kernel(self)  # type: ignore

File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/functions/kernel_plugin.py:243, in KernelPlugin.from_object(cls, plugin_name, plugin_instance, description)
    241     candidates.extend(inspect.getmembers(plugin_instance, inspect.iscoroutinefunction))  # type: ignore
    242 # Read every method from the plugin instance
--> 243 functions = [
    244     KernelFunctionFromMethod(method=candidate, plugin_name=plugin_name)
    245     for _, candidate in candidates
    246     if hasattr(candidate, "__kernel_function__")
    247 ]
    248 if not description:
    249     description = getattr(plugin_instance, "description", None)

File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/functions/kernel_plugin.py:244, in <listcomp>(.0)
    241     candidates.extend(inspect.getmembers(plugin_instance, inspect.iscoroutinefunction))  # type: ignore
    242 # Read every method from the plugin instance
    243 functions = [
--> 244     KernelFunctionFromMethod(method=candidate, plugin_name=plugin_name)
    245     for _, candidate in candidates
    246     if hasattr(candidate, "__kernel_function__")
    247 ]
    248 if not description:
    249     description = getattr(plugin_instance, "description", None)

File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/functions/kernel_function_from_method.py:57, in KernelFunctionFromMethod.__init__(self, method, plugin_name, stream_method, parameters, return_parameter, additional_metadata)
     55 description = method.__kernel_function_description__  # type: ignore
     56 if parameters is None:
---> 57     parameters = [KernelParameterMetadata(**param) for param in method.__kernel_function_parameters__]  # type: ignore
     58 if return_parameter is None:
     59     return_parameter = KernelParameterMetadata(
     60         name="return",
     61         description=method.__kernel_function_return_description__,  # type: ignore
   (...)
     65         is_required=method.__kernel_function_return_required__,  # type: ignore
     66     )

File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/functions/kernel_function_from_method.py:57, in <listcomp>(.0)
     55 description = method.__kernel_function_description__  # type: ignore
     56 if parameters is None:
---> 57     parameters = [KernelParameterMetadata(**param) for param in method.__kernel_function_parameters__]  # type: ignore
     58 if return_parameter is None:
     59     return_parameter = KernelParameterMetadata(
     60         name="return",
     61         description=method.__kernel_function_return_description__,  # type: ignore
   (...)
     65         is_required=method.__kernel_function_return_required__,  # type: ignore
     66     )

    [... skipping hidden 1 frame]

File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/functions/kernel_parameter_metadata.py:33, in KernelParameterMetadata.form_schema(cls, data)
     31     default_value = data.get("default_value", None)
     32     description = data.get("description", None)
---> 33     inferred_schema = cls.infer_schema(type_object, type_, default_value, description)
     34     data["schema_data"] = inferred_schema
     35 return data

File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/functions/kernel_parameter_metadata.py:50, in KernelParameterMetadata.infer_schema(cls, type_object, parameter_type, default_value, description, structured_output)
     47 schema = None
     49 if type_object is not None:
---> 50     schema = KernelJsonSchemaBuilder.build(type_object, description, structured_output)
     51 elif parameter_type is not None:
     52     string_default = str(default_value) if default_value is not None else None

File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/schema/kernel_json_schema_builder.py:58, in KernelJsonSchemaBuilder.build(cls, parameter_type, description, structured_output)
     56     return cls.build_enum_schema(parameter_type, description)
     57 if hasattr(parameter_type, "__annotations__"):
---> 58     return cls.build_model_schema(parameter_type, description, structured_output)
     59 if hasattr(parameter_type, "__args__"):
     60     return cls.handle_complex_type(parameter_type, description, structured_output)

File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/schema/kernel_json_schema_builder.py:86, in KernelJsonSchemaBuilder.build_model_schema(cls, model, description, structured_output)
     83 required = []
     85 model_module_globals = vars(sys.modules[model.__module__])
---> 86 hints = get_type_hints(model, globalns=model_module_globals, localns={})
     88 for field_name, field_type in hints.items():
     89     field_description = None

File /anaconda/envs/azureml_py38/lib/python3.10/typing.py:1833, in get_type_hints(obj, globalns, localns, include_extras)
   1831         if isinstance(value, str):
   1832             value = ForwardRef(value, is_argument=False, is_class=True)
-> 1833         value = _eval_type(value, base_globals, base_locals)
   1834         hints[name] = value
   1835 return hints if include_extras else {k: _strip_annotations(t) for k, t in hints.items()}

File /anaconda/envs/azureml_py38/lib/python3.10/typing.py:327, in _eval_type(t, globalns, localns, recursive_guard)
    321 """Evaluate all forward references in the given type t.
    322 For use of globalns and localns see the docstring for get_type_hints().
    323 recursive_guard is used to prevent infinite recursion with a recursive
    324 ForwardRef.
    325 """
    326 if isinstance(t, ForwardRef):
--> 327     return t._evaluate(globalns, localns, recursive_guard)
    328 if isinstance(t, (_GenericAlias, GenericAlias, types.UnionType)):
    329     ev_args = tuple(_eval_type(a, globalns, localns, recursive_guard) for a in t.__args__)

File /anaconda/envs/azureml_py38/lib/python3.10/typing.py:694, in ForwardRef._evaluate(self, globalns, localns, recursive_guard)
    689 if self.__forward_module__ is not None:
    690     globalns = getattr(
    691         sys.modules.get(self.__forward_module__, None), '__dict__', globalns
    692     )
    693 type_ = _type_check(
--> 694     eval(self.__forward_code__, globalns, localns),
    695     "Forward references must evaluate to types.",
    696     is_argument=self.__forward_is_argument__,
    697     allow_special_forms=self.__forward_is_class__,
    698 )
    699 self.__forward_value__ = _eval_type(
    700     type_, globalns, localns, recursive_guard | {self.__forward_arg__}
    701 )
    702 self.__forward_evaluated__ = True

File <string>:1

NameError: name 'weakref' is not defined
```

When I change it to 

```
from typing import Any

class ColumnMatchingPlugin:
    @kernel_function(name="GetColumnNames", description="Get the list of column names in a dataframe")
    async def get_dataframe_columnnames(self, df: Annotated[Any, 'the dataframe from which to extract the column names from']) -> List[str]:
        return get_dataframe_columnnames(df)

    
    @kernel_function(name="GetColumnValues", description="Get the list of values under a column ")
    async def get_distinct_column_values(self, df, column_name: str) -> List[str]:
        return get_distinct_column_values(df, column_name)

## checking why plugin failed?

from semantic_kernel import Kernel

kernel = Kernel()
plugin = ColumnMatchingPlugin()
kernel.add_plugin(plugin, plugin_name="column_matching_plugin")
```

It gets registered but since in another plugin 

```
# create the plugin for the data extraction agent

class DataExtractionPlugin:
    @kernel_function(name="ConvertJson", description="Extract the data from the JSON file in form of a dataframe") # json to dataframe
    async def convert_json(self, filepath: str) -> pd.DataFrame:
        return json_to_dataframe(filepath = filepath)


    @kernel_function(name="ConvertXML", description="Extract the data from the XML file in form of a dataframe") # xml to dataframe
    async def convert_xml(self, filepath: str) -> pd.DataFrame:
        return xml_to_dataframe(filepath = filepath)


    @kernel_function(name="ConvertCSV", description="Extract the data from the CSV file in form of a dataframe") # csv to dataframe
    async def convert_csv(self, filepath: str) -> pd.DataFrame:
        return csv_to_dataframe(filepath = filepath)
        

    @kernel_function(name="FilterDataframe", description="Find the files relevant to the query") # query the dataframe
    async def filter_dataframe(df: pd.DataFrame, filter_dict: dict) -> pd.DataFrame:
        return filter_dataframe(df = df, filter_dict = filter_dict)


    @kernel_function(name="ConvertDataframe", description="Find the files relevant to the query") # dataframe to string
    async def dataframe_to_string(self, df:pd.DataFrame) -> str:
        return dataframe_to_string(df = df)
```
The task is to return a dataframe, so that all the dataframes can be combined/analysed. Being able to parse pd.DataFrame type would give a more structured output and better handling of dataframes than using Any and Annotating the return type. 

---

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Python: New Feature: Adding Pandas DataFrame Type Introspection #13308

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Python: New Feature: Adding Pandas DataFrame Type Introspection #13308

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions