-
Notifications
You must be signed in to change notification settings - Fork 4.3k
Open
Labels
pythonPull requests for the Python Semantic KernelPull requests for the Python Semantic Kerneltriage
Description
name: Adding Introspection of Pandas DataFrame
about: Currently when using pandas dataframe type in the kernel functions, registering the kernel throws a NameError: name 'weakref' is not defined. Adding the support for introspection of a pandas dataframe type will allow the kernel and the agents to execute functions manipulating dataframes and return and share results amongst themselves in a structured fashion.
## function to get list of column names - wrapper to make pandas function available to the agent
def get_dataframe_columnnames(df: pd.DataFrame) -> List[str]:
return list(df.columns)
# function to get the unique values in a column - wrapper to make pandas function available to the agent
def get_distinct_column_values(df: pd.DataFrame, column_name: str) -> List[str]:
return list(df[column_name].unique())
class ColumnMatchingPlugin:
@kernel_function(name="GetColumnNames", description="Get the list of column names in a dataframe")
async def get_dataframe_columnnames(self, df: Annotated[pd.DataFrame, 'the dataframe from which to extract the column names from']) -> List[str]:
return get_dataframe_columnnames(df)
@kernel_function(name="GetColumnValues", description="Get the list of values under a column ")
async def get_distinct_column_values(self, df, column_name: str) -> List[str]:
return get_distinct_column_values(df, column_name)
## checking why plugin failed?
from semantic_kernel import Kernel
kernel = Kernel()
plugin = ColumnMatchingPlugin()
kernel.add_plugin(plugin, plugin_name="column_matching_plugin")
gives the following error
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[73], line 7
5 kernel = Kernel()
6 plugin = ColumnMatchingPlugin()
----> 7 kernel.add_plugin(plugin, plugin_name="column_matching_plugin")
File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/functions/kernel_function_extension.py:109, in KernelFunctionExtension.add_plugin(self, plugin, plugin_name, parent_directory, description, class_init_arguments, encoding)
107 raise TypeError("plugin_name must be a string.")
108 if plugin:
--> 109 self.plugins[plugin_name] = KernelPlugin.from_object(
110 plugin_name=plugin_name, plugin_instance=plugin, description=description
111 )
112 if isinstance(plugin, AddToKernelCallbackProtocol):
113 plugin.added_to_kernel(self) # type: ignore
File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/functions/kernel_plugin.py:243, in KernelPlugin.from_object(cls, plugin_name, plugin_instance, description)
241 candidates.extend(inspect.getmembers(plugin_instance, inspect.iscoroutinefunction)) # type: ignore
242 # Read every method from the plugin instance
--> 243 functions = [
244 KernelFunctionFromMethod(method=candidate, plugin_name=plugin_name)
245 for _, candidate in candidates
246 if hasattr(candidate, "__kernel_function__")
247 ]
248 if not description:
249 description = getattr(plugin_instance, "description", None)
File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/functions/kernel_plugin.py:244, in <listcomp>(.0)
241 candidates.extend(inspect.getmembers(plugin_instance, inspect.iscoroutinefunction)) # type: ignore
242 # Read every method from the plugin instance
243 functions = [
--> 244 KernelFunctionFromMethod(method=candidate, plugin_name=plugin_name)
245 for _, candidate in candidates
246 if hasattr(candidate, "__kernel_function__")
247 ]
248 if not description:
249 description = getattr(plugin_instance, "description", None)
File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/functions/kernel_function_from_method.py:57, in KernelFunctionFromMethod.__init__(self, method, plugin_name, stream_method, parameters, return_parameter, additional_metadata)
55 description = method.__kernel_function_description__ # type: ignore
56 if parameters is None:
---> 57 parameters = [KernelParameterMetadata(**param) for param in method.__kernel_function_parameters__] # type: ignore
58 if return_parameter is None:
59 return_parameter = KernelParameterMetadata(
60 name="return",
61 description=method.__kernel_function_return_description__, # type: ignore
(...)
65 is_required=method.__kernel_function_return_required__, # type: ignore
66 )
File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/functions/kernel_function_from_method.py:57, in <listcomp>(.0)
55 description = method.__kernel_function_description__ # type: ignore
56 if parameters is None:
---> 57 parameters = [KernelParameterMetadata(**param) for param in method.__kernel_function_parameters__] # type: ignore
58 if return_parameter is None:
59 return_parameter = KernelParameterMetadata(
60 name="return",
61 description=method.__kernel_function_return_description__, # type: ignore
(...)
65 is_required=method.__kernel_function_return_required__, # type: ignore
66 )
[... skipping hidden 1 frame]
File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/functions/kernel_parameter_metadata.py:33, in KernelParameterMetadata.form_schema(cls, data)
31 default_value = data.get("default_value", None)
32 description = data.get("description", None)
---> 33 inferred_schema = cls.infer_schema(type_object, type_, default_value, description)
34 data["schema_data"] = inferred_schema
35 return data
File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/functions/kernel_parameter_metadata.py:50, in KernelParameterMetadata.infer_schema(cls, type_object, parameter_type, default_value, description, structured_output)
47 schema = None
49 if type_object is not None:
---> 50 schema = KernelJsonSchemaBuilder.build(type_object, description, structured_output)
51 elif parameter_type is not None:
52 string_default = str(default_value) if default_value is not None else None
File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/schema/kernel_json_schema_builder.py:58, in KernelJsonSchemaBuilder.build(cls, parameter_type, description, structured_output)
56 return cls.build_enum_schema(parameter_type, description)
57 if hasattr(parameter_type, "__annotations__"):
---> 58 return cls.build_model_schema(parameter_type, description, structured_output)
59 if hasattr(parameter_type, "__args__"):
60 return cls.handle_complex_type(parameter_type, description, structured_output)
File /anaconda/envs/azureml_py38/lib/python3.10/site-packages/semantic_kernel/schema/kernel_json_schema_builder.py:86, in KernelJsonSchemaBuilder.build_model_schema(cls, model, description, structured_output)
83 required = []
85 model_module_globals = vars(sys.modules[model.__module__])
---> 86 hints = get_type_hints(model, globalns=model_module_globals, localns={})
88 for field_name, field_type in hints.items():
89 field_description = None
File /anaconda/envs/azureml_py38/lib/python3.10/typing.py:1833, in get_type_hints(obj, globalns, localns, include_extras)
1831 if isinstance(value, str):
1832 value = ForwardRef(value, is_argument=False, is_class=True)
-> 1833 value = _eval_type(value, base_globals, base_locals)
1834 hints[name] = value
1835 return hints if include_extras else {k: _strip_annotations(t) for k, t in hints.items()}
File /anaconda/envs/azureml_py38/lib/python3.10/typing.py:327, in _eval_type(t, globalns, localns, recursive_guard)
321 """Evaluate all forward references in the given type t.
322 For use of globalns and localns see the docstring for get_type_hints().
323 recursive_guard is used to prevent infinite recursion with a recursive
324 ForwardRef.
325 """
326 if isinstance(t, ForwardRef):
--> 327 return t._evaluate(globalns, localns, recursive_guard)
328 if isinstance(t, (_GenericAlias, GenericAlias, types.UnionType)):
329 ev_args = tuple(_eval_type(a, globalns, localns, recursive_guard) for a in t.__args__)
File /anaconda/envs/azureml_py38/lib/python3.10/typing.py:694, in ForwardRef._evaluate(self, globalns, localns, recursive_guard)
689 if self.__forward_module__ is not None:
690 globalns = getattr(
691 sys.modules.get(self.__forward_module__, None), '__dict__', globalns
692 )
693 type_ = _type_check(
--> 694 eval(self.__forward_code__, globalns, localns),
695 "Forward references must evaluate to types.",
696 is_argument=self.__forward_is_argument__,
697 allow_special_forms=self.__forward_is_class__,
698 )
699 self.__forward_value__ = _eval_type(
700 type_, globalns, localns, recursive_guard | {self.__forward_arg__}
701 )
702 self.__forward_evaluated__ = True
File <string>:1
NameError: name 'weakref' is not defined
When I change it to
from typing import Any
class ColumnMatchingPlugin:
@kernel_function(name="GetColumnNames", description="Get the list of column names in a dataframe")
async def get_dataframe_columnnames(self, df: Annotated[Any, 'the dataframe from which to extract the column names from']) -> List[str]:
return get_dataframe_columnnames(df)
@kernel_function(name="GetColumnValues", description="Get the list of values under a column ")
async def get_distinct_column_values(self, df, column_name: str) -> List[str]:
return get_distinct_column_values(df, column_name)
## checking why plugin failed?
from semantic_kernel import Kernel
kernel = Kernel()
plugin = ColumnMatchingPlugin()
kernel.add_plugin(plugin, plugin_name="column_matching_plugin")
It gets registered but since in another plugin
# create the plugin for the data extraction agent
class DataExtractionPlugin:
@kernel_function(name="ConvertJson", description="Extract the data from the JSON file in form of a dataframe") # json to dataframe
async def convert_json(self, filepath: str) -> pd.DataFrame:
return json_to_dataframe(filepath = filepath)
@kernel_function(name="ConvertXML", description="Extract the data from the XML file in form of a dataframe") # xml to dataframe
async def convert_xml(self, filepath: str) -> pd.DataFrame:
return xml_to_dataframe(filepath = filepath)
@kernel_function(name="ConvertCSV", description="Extract the data from the CSV file in form of a dataframe") # csv to dataframe
async def convert_csv(self, filepath: str) -> pd.DataFrame:
return csv_to_dataframe(filepath = filepath)
@kernel_function(name="FilterDataframe", description="Find the files relevant to the query") # query the dataframe
async def filter_dataframe(df: pd.DataFrame, filter_dict: dict) -> pd.DataFrame:
return filter_dataframe(df = df, filter_dict = filter_dict)
@kernel_function(name="ConvertDataframe", description="Find the files relevant to the query") # dataframe to string
async def dataframe_to_string(self, df:pd.DataFrame) -> str:
return dataframe_to_string(df = df)
The task is to return a dataframe, so that all the dataframes can be combined/analysed. Being able to parse pd.DataFrame type would give a more structured output and better handling of dataframes than using Any and Annotating the return type.
Metadata
Metadata
Assignees
Labels
pythonPull requests for the Python Semantic KernelPull requests for the Python Semantic Kerneltriage