Skip to content

Python: New Feature: Adding Pandas DataFrame Type Introspection #13308

@bankaboy

Description

@bankaboy

name: Adding Introspection of Pandas DataFrame
about: Currently when using pandas dataframe type in the kernel functions, registering the kernel throws a NameError: name 'weakref' is not defined. Adding the support for introspection of a pandas dataframe type will allow the kernel and the agents to execute functions manipulating dataframes and return and share results amongst themselves in a structured fashion.

## function to get list of column names - wrapper to make pandas function available to the agent

def get_dataframe_columnnames(df: pd.DataFrame) -> List[str]:
    return list(df.columns)

# function to get the unique values in a column - wrapper to make pandas function available to the agent

def get_distinct_column_values(df: pd.DataFrame, column_name: str) -> List[str]:
    return list(df[column_name].unique())


class ColumnMatchingPlugin:
    @kernel_function(name="GetColumnNames", description="Get the list of column names in a dataframe")
    async def get_dataframe_columnnames(self, df: Annotated[pd.DataFrame, 'the dataframe from which to extract the column names from']) -> List[str]:
        return get_dataframe_columnnames(df)

    @kernel_function(name="GetColumnValues", description="Get the list of values under a column ")
    async def get_distinct_column_values(self, df, column_name: str) -> List[str]:
        return get_distinct_column_values(df, column_name)

## checking why plugin failed?

from semantic_kernel import Kernel

kernel = Kernel()
plugin = ColumnMatchingPlugin()
kernel.add_plugin(plugin, plugin_name="column_matching_plugin")

gives the following error

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[73], line 7
      5 kernel = Kernel()
      6 plugin = ColumnMatchingPlugin()
----> 7 kernel.add_plugin(plugin, plugin_name="column_matching_plugin")

File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/functions/kernel_function_extension.py:109, in KernelFunctionExtension.add_plugin(self, plugin, plugin_name, parent_directory, description, class_init_arguments, encoding)
    107     raise TypeError("plugin_name must be a string.")
    108 if plugin:
--> 109     self.plugins[plugin_name] = KernelPlugin.from_object(
    110         plugin_name=plugin_name, plugin_instance=plugin, description=description
    111     )
    112     if isinstance(plugin, AddToKernelCallbackProtocol):
    113         plugin.added_to_kernel(self)  # type: ignore

File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/functions/kernel_plugin.py:243, in KernelPlugin.from_object(cls, plugin_name, plugin_instance, description)
    241     candidates.extend(inspect.getmembers(plugin_instance, inspect.iscoroutinefunction))  # type: ignore
    242 # Read every method from the plugin instance
--> 243 functions = [
    244     KernelFunctionFromMethod(method=candidate, plugin_name=plugin_name)
    245     for _, candidate in candidates
    246     if hasattr(candidate, "__kernel_function__")
    247 ]
    248 if not description:
    249     description = getattr(plugin_instance, "description", None)

File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/functions/kernel_plugin.py:244, in <listcomp>(.0)
    241     candidates.extend(inspect.getmembers(plugin_instance, inspect.iscoroutinefunction))  # type: ignore
    242 # Read every method from the plugin instance
    243 functions = [
--> 244     KernelFunctionFromMethod(method=candidate, plugin_name=plugin_name)
    245     for _, candidate in candidates
    246     if hasattr(candidate, "__kernel_function__")
    247 ]
    248 if not description:
    249     description = getattr(plugin_instance, "description", None)

File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/functions/kernel_function_from_method.py:57, in KernelFunctionFromMethod.__init__(self, method, plugin_name, stream_method, parameters, return_parameter, additional_metadata)
     55 description = method.__kernel_function_description__  # type: ignore
     56 if parameters is None:
---> 57     parameters = [KernelParameterMetadata(**param) for param in method.__kernel_function_parameters__]  # type: ignore
     58 if return_parameter is None:
     59     return_parameter = KernelParameterMetadata(
     60         name="return",
     61         description=method.__kernel_function_return_description__,  # type: ignore
   (...)
     65         is_required=method.__kernel_function_return_required__,  # type: ignore
     66     )

File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/functions/kernel_function_from_method.py:57, in <listcomp>(.0)
     55 description = method.__kernel_function_description__  # type: ignore
     56 if parameters is None:
---> 57     parameters = [KernelParameterMetadata(**param) for param in method.__kernel_function_parameters__]  # type: ignore
     58 if return_parameter is None:
     59     return_parameter = KernelParameterMetadata(
     60         name="return",
     61         description=method.__kernel_function_return_description__,  # type: ignore
   (...)
     65         is_required=method.__kernel_function_return_required__,  # type: ignore
     66     )

    [... skipping hidden 1 frame]

File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/functions/kernel_parameter_metadata.py:33, in KernelParameterMetadata.form_schema(cls, data)
     31     default_value = data.get("default_value", None)
     32     description = data.get("description", None)
---> 33     inferred_schema = cls.infer_schema(type_object, type_, default_value, description)
     34     data["schema_data"] = inferred_schema
     35 return data

File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/functions/kernel_parameter_metadata.py:50, in KernelParameterMetadata.infer_schema(cls, type_object, parameter_type, default_value, description, structured_output)
     47 schema = None
     49 if type_object is not None:
---> 50     schema = KernelJsonSchemaBuilder.build(type_object, description, structured_output)
     51 elif parameter_type is not None:
     52     string_default = str(default_value) if default_value is not None else None

File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/schema/kernel_json_schema_builder.py:58, in KernelJsonSchemaBuilder.build(cls, parameter_type, description, structured_output)
     56     return cls.build_enum_schema(parameter_type, description)
     57 if hasattr(parameter_type, "__annotations__"):
---> 58     return cls.build_model_schema(parameter_type, description, structured_output)
     59 if hasattr(parameter_type, "__args__"):
     60     return cls.handle_complex_type(parameter_type, description, structured_output)

File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/schema/kernel_json_schema_builder.py:86, in KernelJsonSchemaBuilder.build_model_schema(cls, model, description, structured_output)
     83 required = []
     85 model_module_globals = vars(sys.modules[model.__module__])
---> 86 hints = get_type_hints(model, globalns=model_module_globals, localns={})
     88 for field_name, field_type in hints.items():
     89     field_description = None

File /anaconda/envs/azureml_py38/lib/python3.10/typing.py:1833, in get_type_hints(obj, globalns, localns, include_extras)
   1831         if isinstance(value, str):
   1832             value = ForwardRef(value, is_argument=False, is_class=True)
-> 1833         value = _eval_type(value, base_globals, base_locals)
   1834         hints[name] = value
   1835 return hints if include_extras else {k: _strip_annotations(t) for k, t in hints.items()}

File /anaconda/envs/azureml_py38/lib/python3.10/typing.py:327, in _eval_type(t, globalns, localns, recursive_guard)
    321 """Evaluate all forward references in the given type t.
    322 For use of globalns and localns see the docstring for get_type_hints().
    323 recursive_guard is used to prevent infinite recursion with a recursive
    324 ForwardRef.
    325 """
    326 if isinstance(t, ForwardRef):
--> 327     return t._evaluate(globalns, localns, recursive_guard)
    328 if isinstance(t, (_GenericAlias, GenericAlias, types.UnionType)):
    329     ev_args = tuple(_eval_type(a, globalns, localns, recursive_guard) for a in t.__args__)

File /anaconda/envs/azureml_py38/lib/python3.10/typing.py:694, in ForwardRef._evaluate(self, globalns, localns, recursive_guard)
    689 if self.__forward_module__ is not None:
    690     globalns = getattr(
    691         sys.modules.get(self.__forward_module__, None), '__dict__', globalns
    692     )
    693 type_ = _type_check(
--> 694     eval(self.__forward_code__, globalns, localns),
    695     "Forward references must evaluate to types.",
    696     is_argument=self.__forward_is_argument__,
    697     allow_special_forms=self.__forward_is_class__,
    698 )
    699 self.__forward_value__ = _eval_type(
    700     type_, globalns, localns, recursive_guard | {self.__forward_arg__}
    701 )
    702 self.__forward_evaluated__ = True

File <string>:1

NameError: name 'weakref' is not defined

When I change it to

from typing import Any

class ColumnMatchingPlugin:
    @kernel_function(name="GetColumnNames", description="Get the list of column names in a dataframe")
    async def get_dataframe_columnnames(self, df: Annotated[Any, 'the dataframe from which to extract the column names from']) -> List[str]:
        return get_dataframe_columnnames(df)

    
    @kernel_function(name="GetColumnValues", description="Get the list of values under a column ")
    async def get_distinct_column_values(self, df, column_name: str) -> List[str]:
        return get_distinct_column_values(df, column_name)

## checking why plugin failed?

from semantic_kernel import Kernel

kernel = Kernel()
plugin = ColumnMatchingPlugin()
kernel.add_plugin(plugin, plugin_name="column_matching_plugin")

It gets registered but since in another plugin

# create the plugin for the data extraction agent

class DataExtractionPlugin:
    @kernel_function(name="ConvertJson", description="Extract the data from the JSON file in form of a dataframe") # json to dataframe
    async def convert_json(self, filepath: str) -> pd.DataFrame:
        return json_to_dataframe(filepath = filepath)


    @kernel_function(name="ConvertXML", description="Extract the data from the XML file in form of a dataframe") # xml to dataframe
    async def convert_xml(self, filepath: str) -> pd.DataFrame:
        return xml_to_dataframe(filepath = filepath)


    @kernel_function(name="ConvertCSV", description="Extract the data from the CSV file in form of a dataframe") # csv to dataframe
    async def convert_csv(self, filepath: str) -> pd.DataFrame:
        return csv_to_dataframe(filepath = filepath)
        

    @kernel_function(name="FilterDataframe", description="Find the files relevant to the query") # query the dataframe
    async def filter_dataframe(df: pd.DataFrame, filter_dict: dict) -> pd.DataFrame:
        return filter_dataframe(df = df, filter_dict = filter_dict)


    @kernel_function(name="ConvertDataframe", description="Find the files relevant to the query") # dataframe to string
    async def dataframe_to_string(self, df:pd.DataFrame) -> str:
        return dataframe_to_string(df = df)

The task is to return a dataframe, so that all the dataframes can be combined/analysed. Being able to parse pd.DataFrame type would give a more structured output and better handling of dataframes than using Any and Annotating the return type.


Metadata

Metadata

Assignees

No one assigned

    Labels

    pythonPull requests for the Python Semantic Kerneltriage

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions