diff --git a/.github/workflows/run-forecast-explainer-tests.yml b/.github/workflows/run-forecast-explainer-tests.yml
index fc03691b0..7b514c217 100644
--- a/.github/workflows/run-forecast-explainer-tests.yml
+++ b/.github/workflows/run-forecast-explainer-tests.yml
@@ -46,6 +46,12 @@ jobs:
- uses: ./.github/workflows/set-dummy-conf
name: "Test config setup"
+ - name: Free up disk space
+ run: |
+ sudo apt-get clean
+ sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc
+ df -h
+
- name: "Run Forecast Explainer Tests"
timeout-minutes: 180
shell: bash
diff --git a/ads/automl/provider.py b/ads/automl/provider.py
index 729d4fba1..3575000f8 100644
--- a/ads/automl/provider.py
+++ b/ads/automl/provider.py
@@ -1,38 +1,33 @@
#!/usr/bin/env python
-# -*- coding: utf-8; -*-
-# Copyright (c) 2020, 2023 Oracle and/or its affiliates.
+# Copyright (c) 2020, 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
import logging
-import time
import sys
+import time
import warnings
-from abc import ABC, abstractmethod, abstractproperty
-import math
-import pandas as pd
+from abc import ABC, abstractmethod
+
+import matplotlib.pyplot as plt
import numpy as np
+import pandas as pd
from sklearn import set_config
from sklearn.dummy import DummyClassifier, DummyRegressor
-import matplotlib.pyplot as plt
-
import ads
+from ads.common import logger, utils
+from ads.common.decorator.deprecate import deprecated
+from ads.common.decorator.runtime_dependency import (
+ OptionalDependency,
+ runtime_dependency,
+)
from ads.common.utils import (
+ is_notebook,
ml_task_types,
wrap_lines,
- is_documentation_mode,
- is_notebook,
)
-from ads.common.decorator.runtime_dependency import (
- runtime_dependency,
- OptionalDependency,
-)
-from ads.common.decorator.deprecate import deprecated
from ads.dataset.label_encoder import DataFrameLabelEncoder
-from ads.dataset.helper import is_text_data
-
-from ads.common import logger, utils
class AutoMLProvider(ABC):
@@ -141,7 +136,7 @@ def get_transformer_pipeline(self):
pass
-class BaselineModel(object):
+class BaselineModel:
"""
A BaselineModel object that supports fit/predict/predict_proba/transform
interface. Labels (y) are encoded using DataFrameLabelEncoder.
@@ -156,7 +151,6 @@ def __init__(self, est):
self.df_label_encoder = DataFrameLabelEncoder()
def predict(self, X):
-
"""
Runs the Baselines predict function and returns the result.
@@ -174,7 +168,6 @@ def predict(self, X):
return self.est.predict(X)
def predict_proba(self, X):
-
"""
Runs the Baselines predict_proba function and returns the result.
@@ -192,7 +185,6 @@ def predict_proba(self, X):
return self.est.predict_proba(X)
def fit(self, X, y):
-
"""
Fits the baseline estimator.
@@ -213,7 +205,6 @@ def fit(self, X, y):
return self
def transform(self, X):
-
"""
Runs the Baselines transform function and returns the result.
@@ -304,16 +295,15 @@ def decide_estimator(self, **kwargs):
"""
if self.est is not None:
return self.est
- else:
- if self.ml_task_type == ml_task_types.REGRESSION:
- return BaselineModel(DummyRegressor())
- elif self.ml_task_type in [
- ml_task_types.BINARY_CLASSIFICATION,
- ml_task_types.MULTI_CLASS_CLASSIFICATION,
- ml_task_types.BINARY_TEXT_CLASSIFICATION,
- ml_task_types.MULTI_CLASS_TEXT_CLASSIFICATION,
- ]:
- return BaselineModel(DummyClassifier())
+ elif self.ml_task_type == ml_task_types.REGRESSION:
+ return BaselineModel(DummyRegressor())
+ elif self.ml_task_type in [
+ ml_task_types.BINARY_CLASSIFICATION,
+ ml_task_types.MULTI_CLASS_CLASSIFICATION,
+ ml_task_types.BINARY_TEXT_CLASSIFICATION,
+ ml_task_types.MULTI_CLASS_TEXT_CLASSIFICATION,
+ ]:
+ return BaselineModel(DummyClassifier())
# An installation of oracle labs automl is required only for this class
@@ -483,8 +473,11 @@ def print_summary(
0, "Rank based on Performance", np.arange(2, len(sorted_summary_df) + 2)
)
- from IPython.core.display import display, HTML
+ from IPython.display import HTML
+ from ads.common.utils import get_display
+
+ display = get_display()
with pd.option_context(
"display.max_colwidth",
1000,
@@ -595,9 +588,7 @@ def _decide_estimator(self, **kwargs):
if (
self.ml_task_type == ml_task_types.BINARY_CLASSIFICATION
or self.ml_task_type == ml_task_types.BINARY_TEXT_CLASSIFICATION
- ):
- test_model_list = ["LogisticRegression"]
- elif (
+ ) or (
self.ml_task_type == ml_task_types.MULTI_CLASS_CLASSIFICATION
or self.ml_task_type == ml_task_types.MULTI_CLASS_TEXT_CLASSIFICATION
):
@@ -712,7 +703,7 @@ def visualize_algorithm_selection_trials(self, ylabel=None):
for f in mean_scores_ser.keys():
se = scipy.stats.sem(scores_ser[f], ddof=1)
y_error.append(se)
- if f == "{}_AS".format(self.est.selected_model_):
+ if f == f"{self.est.selected_model_}_AS":
colors.append("orange")
elif mean_scores_ser[f] >= mean_scores_ser.mean():
colors.append("teal")
@@ -741,7 +732,7 @@ def visualize_adaptive_sampling_trials(self):
_log_visualize_no_trials("adaptive sampling")
return
fig, ax = plt.subplots(1, figsize=(6, 3))
- ax.set_title("Adaptive Sampling ({})".format(trials[0][0]))
+ ax.set_title(f"Adaptive Sampling ({trials[0][0]})")
ax.set_xlabel("Dataset sample size")
ax.set_ylabel(r"Predicted model score")
scores = [
@@ -882,7 +873,7 @@ def visualize_tuning_trials(self, ylabel=None):
plt.show()
-class AutoMLPreprocessingTransformer(object): # pragma: no cover
+class AutoMLPreprocessingTransformer: # pragma: no cover
@deprecated(
details="Working with AutoML has moved from within ADS to working directly with the AutoMLx library. AutoMLx are preinstalled in conda pack automlx_p38_cpu_v2 and later, and can now be updated independently of ADS. AutoMLx documentation may be found at https://docs.oracle.com/en-us/iaas/tools/automlx/latest/html/multiversion/v23.1.1/index.html. Notebook examples are in Oracle's samples repository: https://github.com/oracle-samples/oci-data-science-ai-samples/tree/master/notebook_examples and a migration tutorial can be found at https://accelerated-data-science.readthedocs.io/en/latest/user_guide/model_training/automl/quick_start.html .",
raise_error=True,
@@ -931,7 +922,7 @@ def __repr__(self):
return self.msg
-class AutoMLFeatureSelection(object): # pragma: no cover
+class AutoMLFeatureSelection: # pragma: no cover
@deprecated(
details="Working with AutoML has moved from within ADS to working directly with the AutoMLx library. AutoMLx are preinstalled in conda pack automlx_p38_cpu_v2 and later, and can now be updated independently of ADS. AutoMLx documentation may be found at https://docs.oracle.com/en-us/iaas/tools/automlx/latest/html/multiversion/v23.1.1/index.html. Notebook examples are in Oracle's samples repository: https://github.com/oracle-samples/oci-data-science-ai-samples/tree/master/notebook_examples and a migration tutorial can be found at https://accelerated-data-science.readthedocs.io/en/latest/user_guide/model_training/automl/quick_start.html .",
raise_error=True,
diff --git a/ads/catalog/model.py b/ads/catalog/model.py
index 9540416d2..4f1c7e32c 100644
--- a/ads/catalog/model.py
+++ b/ads/catalog/model.py
@@ -1,7 +1,6 @@
#!/usr/bin/env python
-# -*- coding: utf-8; -*-
-# Copyright (c) 2020, 2024 Oracle and/or its affiliates.
+# Copyright (c) 2020, 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
import warnings
@@ -27,20 +26,30 @@
import pandas as pd
import yaml
+from oci.data_science.data_science_client import DataScienceClient
+from oci.data_science.models import (
+ ArtifactExportDetailsObjectStorage,
+ ArtifactImportDetailsObjectStorage,
+ CreateModelDetails,
+ ExportModelArtifactDetails,
+ ImportModelArtifactDetails,
+ ModelSummary,
+ WorkRequest,
+)
+from oci.data_science.models import Model as OCIModel
+from oci.data_science.models.model_provenance import ModelProvenance
+from oci.data_science.models.update_model_details import UpdateModelDetails
+from oci.exceptions import ServiceError
+from oci.identity import IdentityClient
+
from ads.catalog.summary import SummaryList
from ads.common import auth, logger, oci_client, utils
from ads.common.decorator.deprecate import deprecated
from ads.common.decorator.runtime_dependency import (
- runtime_dependency,
OptionalDependency,
+ runtime_dependency,
)
from ads.common.model_artifact import ConflictStrategy, ModelArtifact
-from ads.model.model_metadata import (
- METADATA_SIZE_LIMIT,
- MetadataSizeTooLarge,
- ModelCustomMetadata,
- ModelTaxonomyMetadata,
-)
from ads.common.object_storage_details import ObjectStorageDetails
from ads.common.oci_resource import SEARCH_TYPE, OCIResource
from ads.config import (
@@ -51,22 +60,14 @@
)
from ads.dataset.progress import TqdmProgressBar
from ads.feature_engineering.schema import Schema
-from ads.model.model_version_set import ModelVersionSet, _extract_model_version_set_id
from ads.model.deployment.model_deployer import ModelDeployer
-from oci.data_science.data_science_client import DataScienceClient
-from oci.data_science.models import (
- ArtifactExportDetailsObjectStorage,
- ArtifactImportDetailsObjectStorage,
- CreateModelDetails,
- ExportModelArtifactDetails,
- ImportModelArtifactDetails,
+from ads.model.model_metadata import (
+ METADATA_SIZE_LIMIT,
+ MetadataSizeTooLarge,
+ ModelCustomMetadata,
+ ModelTaxonomyMetadata,
)
-from oci.data_science.models import Model as OCIModel
-from oci.data_science.models import ModelSummary, WorkRequest
-from oci.data_science.models.model_provenance import ModelProvenance
-from oci.data_science.models.update_model_details import UpdateModelDetails
-from oci.exceptions import ServiceError
-from oci.identity import IdentityClient
+from ads.model.model_version_set import ModelVersionSet, _extract_model_version_set_id
_UPDATE_MODEL_DETAILS_ATTRIBUTES = [
"display_name",
@@ -391,8 +392,9 @@ def show_in_notebook(self, display_format: str = "dataframe") -> None:
Nothing.
"""
if display_format == "dataframe":
- from IPython.core.display import display
+ from ads.common.utils import get_display
+ display = get_display()
display(self.to_dataframe())
elif display_format == "yaml":
print(self._to_yaml())
@@ -454,9 +456,9 @@ def commit(self, force: bool = True) -> None:
if hasattr(self, "metadata_custom"):
attributes["custom_metadata_list"] = self.metadata_custom._to_oci_metadata()
if hasattr(self, "metadata_taxonomy"):
- attributes[
- "defined_metadata_list"
- ] = self.metadata_taxonomy._to_oci_metadata()
+ attributes["defined_metadata_list"] = (
+ self.metadata_taxonomy._to_oci_metadata()
+ )
update_model_details = UpdateModelDetails(**attributes)
# freeform_tags=self._model.freeform_tags, defined_tags=self._model.defined_tags)
@@ -558,7 +560,7 @@ def load_model(
try:
provenance_response = cls._get_provenance_metadata(ds_client, model_id)
- except Exception as e:
+ except Exception:
raise ValueError(
f"Unable to fetch model provenance metadata for model {model_id}"
)
@@ -1071,7 +1073,7 @@ def _download_large_artifact(
None
Nothing.
"""
- progress.update(f"Importing model artifacts from model catalog")
+ progress.update("Importing model artifacts from model catalog")
self._import_model_artifact(model_id=model_id, bucket_uri=bucket_uri)
progress.update("Copying model artifacts to the artifact directory")
@@ -1360,7 +1362,7 @@ def upload_model(
raise ValueError("project_id needs to be specified.")
schema_file = os.path.join(model_artifact.artifact_dir, "schema.json")
if os.path.exists(schema_file):
- with open(schema_file, "r") as schema:
+ with open(schema_file) as schema:
metadata = json.load(schema)
freeform_tags = {"problem_type": metadata["problem_type"]}
@@ -1475,7 +1477,7 @@ def _export_model_artifact(
3. Exports artifact from the user's object storage bucket to the system one.
"""
artifact_zip_path = self._prepare_model_artifact(model_artifact, progress)
- progress.update(f"Copying model artifact to the Object Storage bucket")
+ progress.update("Copying model artifact to the Object Storage bucket")
try:
bucket_uri_file_name = os.path.basename(bucket_uri)
diff --git a/ads/catalog/notebook.py b/ads/catalog/notebook.py
index c8ac60850..38a61b710 100644
--- a/ads/catalog/notebook.py
+++ b/ads/catalog/notebook.py
@@ -1,7 +1,6 @@
#!/usr/bin/env python
-# -*- coding: utf-8; -*-
-# Copyright (c) 2020, 2024 Oracle and/or its affiliates.
+# Copyright (c) 2020, 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
import warnings
@@ -14,31 +13,31 @@
stacklevel=2,
)
-from pandas import DataFrame
-import oci
+from types import MethodType
+
from oci.data_science.models import (
- NotebookSessionSummary,
- UpdateNotebookSessionDetails,
CreateNotebookSessionDetails,
NotebookSession,
NotebookSessionConfigurationDetails,
+ NotebookSessionSummary,
+ UpdateNotebookSessionDetails,
)
from oci.exceptions import ServiceError
-from types import MethodType
+from pandas import DataFrame
from ads.catalog.summary import SummaryList
+from ads.common import auth as authutil
+from ads.common import oci_client as oc
from ads.common import utils
from ads.common.decorator.runtime_dependency import (
- runtime_dependency,
OptionalDependency,
+ runtime_dependency,
)
-from ads.common import auth as authutil
-from ads.common import oci_client as oc
from ads.config import (
- OCI_IDENTITY_SERVICE_ENDPOINT,
NB_SESSION_COMPARTMENT_OCID,
- PROJECT_OCID,
+ OCI_IDENTITY_SERVICE_ENDPOINT,
OCI_ODSC_SERVICE_ENDPOINT,
+ PROJECT_OCID,
)
create_notebook_details_attributes = CreateNotebookSessionDetails().swagger_types.keys()
@@ -210,8 +209,9 @@ def show_in_notebook(notebook_self):
"""
Describe the project by showing it's properties
"""
- from IPython.core.display import display
+ from ads.common.utils import get_display
+ display = get_display()
display(notebook_self)
def _repr_html_(notebook_self):
diff --git a/ads/catalog/project.py b/ads/catalog/project.py
index 81f24fe82..05857e607 100644
--- a/ads/catalog/project.py
+++ b/ads/catalog/project.py
@@ -1,7 +1,6 @@
#!/usr/bin/env python
-# -*- coding: utf-8; -*-
-# Copyright (c) 2020, 2024 Oracle and/or its affiliates.
+# Copyright (c) 2020, 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
import warnings
@@ -14,30 +13,29 @@
stacklevel=2,
)
-from ads.catalog.summary import SummaryList
-from ads.common import oci_client, auth
-from ads.common import utils
-from ads.common.decorator.runtime_dependency import (
- runtime_dependency,
- OptionalDependency,
-)
-from ads.config import (
- OCI_ODSC_SERVICE_ENDPOINT,
- OCI_IDENTITY_SERVICE_ENDPOINT,
- NB_SESSION_COMPARTMENT_OCID,
-)
from collections.abc import Mapping
-from oci.config import from_file
+from types import MethodType
+
from oci.data_science.models import (
+ CreateProjectDetails,
Project,
ProjectSummary,
- CreateProjectDetails,
UpdateProjectDetails,
)
from oci.exceptions import ServiceError
from pandas import DataFrame
-from types import MethodType
+from ads.catalog.summary import SummaryList
+from ads.common import auth, oci_client, utils
+from ads.common.decorator.runtime_dependency import (
+ OptionalDependency,
+ runtime_dependency,
+)
+from ads.config import (
+ NB_SESSION_COMPARTMENT_OCID,
+ OCI_IDENTITY_SERVICE_ENDPOINT,
+ OCI_ODSC_SERVICE_ENDPOINT,
+)
create_project_details_attributes = CreateProjectDetails().swagger_types.keys()
update_project_details_attributes = UpdateProjectDetails().swagger_types.keys()
@@ -229,8 +227,9 @@ def show_in_notebook(project_self):
"""
Describe the project by showing it's properties
"""
- from IPython.core.display import display
+ from ads.common.utils import get_display
+ display = get_display()
display(project_self)
def _repr_html_(project_self):
diff --git a/ads/catalog/summary.py b/ads/catalog/summary.py
index 9072fc940..04922974d 100644
--- a/ads/catalog/summary.py
+++ b/ads/catalog/summary.py
@@ -1,10 +1,8 @@
#!/usr/bin/env python
-# -*- coding: utf-8 -*--
-# Copyright (c) 2020, 2024 Oracle and/or its affiliates.
+# Copyright (c) 2020, 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
-from __future__ import print_function, absolute_import
import warnings
warnings.warn(
@@ -15,14 +13,16 @@
stacklevel=2,
)
import abc
-import ads.common.utils as utils
+from abc import ABCMeta
+
+import pandas as pd
from oci.util import to_dict
from pandas import DataFrame
-import pandas as pd
-from abc import ABCMeta
+
+from ads.common import utils
from ads.common.decorator.runtime_dependency import (
- runtime_dependency,
OptionalDependency,
+ runtime_dependency,
)
@@ -131,8 +131,9 @@ def show_in_notebook(self, datetime_format=None):
-------
None
"""
- from IPython.core.display import display
+ from ads.common.utils import get_display
+ display = get_display()
display(
self.to_dataframe(datetime_format=datetime_format).style.applymap(
self._color_lifecycle_state, subset=["lifecycle_state"]
diff --git a/ads/common/model.py b/ads/common/model.py
index bfee5384f..2783a9d21 100644
--- a/ads/common/model.py
+++ b/ads/common/model.py
@@ -1,38 +1,38 @@
#!/usr/bin/env python
-# -*- coding: utf-8; -*-
-# Copyright (c) 2020, 2022 Oracle and/or its affiliates.
+# Copyright (c) 2020, 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
-from distutils import dir_util
import os
import shutil
from collections.abc import Iterable
+from distutils import dir_util
import numpy as np
import pandas as pd
+from sklearn.pipeline import Pipeline
+
from ads.common import logger, utils
+from ads.common.decorator.deprecate import deprecated
+from ads.common.decorator.runtime_dependency import (
+ OptionalDependency,
+ runtime_dependency,
+)
from ads.common.model_export_util import (
Progress_Steps_W_Fn,
Progress_Steps_Wo_Fn,
prepare_generic_model,
serialize_model,
)
-from ads.model.transformer.onnx_transformer import ONNXTransformer
-from ads.common.decorator.runtime_dependency import (
- runtime_dependency,
- OptionalDependency,
-)
-from ads.common.decorator.deprecate import deprecated
from ads.common.utils import is_notebook
from ads.dataset.pipeline import TransformerPipeline
-from sklearn.pipeline import Pipeline
+from ads.model.transformer.onnx_transformer import ONNXTransformer
Unsupported_Model_Types = []
NoTransformModels = ["torch", "tensorflow", "keras", "automl"]
-class ADSModel(object):
+class ADSModel:
def __init__(
self,
est,
@@ -107,11 +107,7 @@ def from_estimator(est, transformers=None, classes=None, name=None):
>>> model = MyModelClass.train()
>>> model_ads = from_estimator(model)
"""
- if hasattr(est, "predict"):
- return ADSModel(
- est, transformer_pipeline=transformers, classes=classes, name=name
- )
- elif callable(est):
+ if hasattr(est, "predict") or callable(est):
return ADSModel(
est, transformer_pipeline=transformers, classes=classes, name=name
)
@@ -157,7 +153,6 @@ def _get_underlying_model_type(self):
)
else:
self._underlying_model = "Unknown"
- return
def rename(self, name):
"""
@@ -308,7 +303,7 @@ def transform(self, X):
for transformer in transformer_pipeline:
try:
X = transformer.transform(X)
- except Exception as e:
+ except Exception:
pass
# logger.warn("Skipping pre-processing.")
if self.target is not None and self.target in X.columns:
@@ -347,11 +342,9 @@ def feature_names(self, X=None):
return self.est.feature_name()
except AttributeError:
return X.columns
- elif model_type == "tensorflow":
- return []
- elif model_type == "keras":
- return []
- elif model_type == "mxnet":
+ elif (
+ model_type == "tensorflow" or model_type == "keras" or model_type == "mxnet"
+ ):
return []
else:
try:
@@ -640,15 +633,17 @@ def show_in_notebook(self):
"display.precision",
4,
):
- from IPython.core.display import HTML, display
+ from IPython.display import HTML
+
+ from ads.common.utils import get_display
+ display = get_display()
display(HTML(info_df.to_html(index=False, header=False)))
return info
@staticmethod
@runtime_dependency(module="skl2onnx", install_from=OptionalDependency.ONNX)
def get_init_types(df, underlying_model=None):
-
from skl2onnx.common.data_types import FloatTensorType
if underlying_model == "sklearn":
diff --git a/ads/common/utils.py b/ads/common/utils.py
index e0226739d..712321a77 100644
--- a/ads/common/utils.py
+++ b/ads/common/utils.py
@@ -546,7 +546,7 @@ def print_user_message(
else:
user_message = "{}".format(msg.strip().replace("\n", "
"))
- from IPython.core.display import HTML, display
+ from IPython.display import HTML, display
display(
HTML(
@@ -827,6 +827,8 @@ def get_sqlalchemy_engine(connection_url, *args, **kwargs):
The engine from which SqlAlchemny commands can be ran on
"""
global _engines
+ import sqlalchemy
+
if connection_url not in _engines:
#
# Note: pool_recycle=1 is used here because sqlalchemy is free to drop inactive
@@ -1848,3 +1850,24 @@ def parse_content_disposition(header: str) -> Tuple[str, Dict[str, str]]:
key, value = part.split("=", 1)
params[key.strip().lower()] = value.strip().strip('"')
return disposition, params
+
+
+def get_display():
+ """
+ Return IPython.display.display if available; otherwise a no-op function.
+
+ This centralizes all display imports. Usage:
+ from ads.common.utils import get_display
+ display = get_display()
+ display(obj)
+ """
+ try:
+ from IPython.display import display # correct import path
+
+ return display
+ except ModuleNotFoundError:
+
+ def _noop(*args, **kwargs):
+ return None
+
+ return _noop
diff --git a/ads/data_labeling/mixin/data_labeling.py b/ads/data_labeling/mixin/data_labeling.py
index e2c65eb20..ee1dc7583 100644
--- a/ads/data_labeling/mixin/data_labeling.py
+++ b/ads/data_labeling/mixin/data_labeling.py
@@ -1,17 +1,17 @@
#!/usr/bin/env python
-# -*- coding: utf-8; -*-
-# Copyright (c) 2021, 2023 Oracle and/or its affiliates.
+# Copyright (c) 2021, 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
from typing import Dict, List
+
from ads.common import auth as authutil
-from ads.data_labeling.reader.dataset_reader import LabeledDatasetReader
-from ads.data_labeling.visualizer import image_visualizer, text_visualizer
from ads.common.decorator.runtime_dependency import (
- runtime_dependency,
OptionalDependency,
+ runtime_dependency,
)
+from ads.data_labeling.reader.dataset_reader import LabeledDatasetReader
+from ads.data_labeling.visualizer import image_visualizer, text_visualizer
ROWS_TO_RENDER_LIMIT = 50
@@ -227,6 +227,9 @@ def render_ner(
if return_html:
return result_html
- from IPython.core.display import HTML, Markdown, display
+ from IPython.display import Markdown
+
+ from ads.common.utils import get_display
+ display = get_display()
display(Markdown(result_html))
diff --git a/ads/dataset/dataset.py b/ads/dataset/dataset.py
index 025667188..59f2a6e41 100644
--- a/ads/dataset/dataset.py
+++ b/ads/dataset/dataset.py
@@ -1,37 +1,47 @@
#!/usr/bin/env python
-# -*- coding: utf-8 -*--
-# Copyright (c) 2020, 2024 Oracle and/or its affiliates.
+# Copyright (c) 2020, 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
-from __future__ import print_function, absolute_import, division
import copy
import datetime
-import fsspec
-import numpy as np
import os
-import pandas as pd
import uuid
-
from collections import Counter
+from typing import Iterable, Union
+
+import fsspec
+import numpy as np
+import pandas as pd
from sklearn.preprocessing import FunctionTransformer
-from typing import Iterable, Tuple, Union
from ads import set_documentation_mode
from ads.common import utils
from ads.common.decorator.deprecate import deprecated
+from ads.common.decorator.runtime_dependency import (
+ OptionalDependency,
+ runtime_dependency,
+)
from ads.dataset import helper, logger
+from ads.dataset.correlation import (
+ _cat_vs_cat,
+ _cat_vs_cts,
+ _get_columns_by_type,
+ _validate_correlation_methods,
+)
+from ads.dataset.correlation_plot import plot_correlation_heatmap
from ads.dataset.dataframe_transformer import DataFrameTransformer
from ads.dataset.exception import ValidationError
from ads.dataset.helper import (
- convert_columns,
- fix_column_names,
- generate_sample,
DatasetDefaults,
+ convert_columns,
deprecate_default_value,
deprecate_variable,
+ fix_column_names,
+ generate_sample,
get_dataset,
+ get_feature_type,
infer_target_type,
)
from ads.dataset.label_encoder import DataFrameLabelEncoder
@@ -39,18 +49,6 @@
from ads.dataset.progress import DummyProgressBar
from ads.dataset.sampled_dataset import PandasDataset
from ads.type_discovery.type_discovery_driver import TypeDiscoveryDriver
-from ads.dataset.helper import get_feature_type
-from ads.dataset.correlation_plot import plot_correlation_heatmap
-from ads.dataset.correlation import (
- _cat_vs_cts,
- _cat_vs_cat,
- _get_columns_by_type,
- _validate_correlation_methods,
-)
-from ads.common.decorator.runtime_dependency import (
- runtime_dependency,
- OptionalDependency,
-)
N_Features_Wide_Dataset = 64
@@ -194,8 +192,11 @@ def compute(self):
)
@runtime_dependency(module="IPython", install_from=OptionalDependency.NOTEBOOK)
def _repr_html_(self):
- from IPython.core.display import display, HTML
+ from IPython.display import HTML
+ from ads.common.utils import get_display
+
+ display = get_display()
display(
HTML(
utils.horizontal_scrollable_div(
@@ -254,8 +255,11 @@ def _constructor(self):
module="IPython", install_from=OptionalDependency.NOTEBOOK
)
def _repr_html_(self):
- from IPython.core.display import display, HTML
+ from IPython.display import HTML
+
+ from ads.common.utils import get_display
+ display = get_display()
display(
HTML(
utils.horizontal_scrollable_div(
@@ -266,7 +270,6 @@ def _repr_html_(self):
)
)
)
- return None
def __repr__(self):
return "{} rows, {} columns".format(*self.shape)
@@ -838,7 +841,7 @@ def snapshot(self, snapshot_dir=None, name="", storage_options=None):
>>> ds_uri = ds.snapshot()
"""
if snapshot_dir is None:
- import ads.dataset.factory as factory
+ from ads.dataset import factory
snapshot_dir = factory.default_snapshots_dir
if snapshot_dir is None:
@@ -854,7 +857,7 @@ def snapshot(self, snapshot_dir=None, name="", storage_options=None):
parquet_file = "%s%s.parquet" % (snapshot_dir, name)
os.makedirs(snapshot_dir, exist_ok=True)
if storage_options is None and parquet_file[:3] == "oci":
- import ads.dataset.factory as factory
+ from ads.dataset import factory
storage_options = factory.default_storage_options
logger.info("Using default storage options.")
@@ -891,7 +894,7 @@ def to_csv(self, path, storage_options=None, **kwargs):
>>> [ds_link] = ds.to_csv("my/path.csv")
"""
if storage_options is None:
- import ads.dataset.factory as factory
+ from ads.dataset import factory
storage_options = factory.default_storage_options
logger.info("Using default storage options")
@@ -919,7 +922,7 @@ def to_parquet(self, path, storage_options=None, **kwargs):
>>> ds.to_parquet("my/path")
"""
if storage_options is None:
- import ads.dataset.factory as factory
+ from ads.dataset import factory
storage_options = factory.default_storage_options
logger.info("Using default storage options")
@@ -947,7 +950,7 @@ def to_json(self, path, storage_options=None, **kwargs):
>>> ds.to_json("my/path.json")
"""
if storage_options is None:
- import ads.dataset.factory as factory
+ from ads.dataset import factory
storage_options = factory.default_storage_options
logger.info("Using default storage options")
@@ -983,7 +986,7 @@ def to_hdf(
>>> ds.to_hdf(path="my/path.h5", key="df")
"""
if storage_options is None:
- import ads.dataset.factory as factory
+ from ads.dataset import factory
storage_options = factory.default_storage_options
logger.info("Using default storage options")
@@ -1286,9 +1289,8 @@ def _build_new_dataset(
DatasetDefaults.sampling_confidence_interval,
**init_kwargs,
)
- else:
- if progress:
- progress.update()
+ elif progress:
+ progress.update()
shape = (n, len(df.columns))
if not utils.is_same_class(self, ADSDataset) and target is None:
target = self.target.name
@@ -1424,7 +1426,7 @@ def corr(
force_recompute = deprecate_variable(
overwrite,
force_recompute,
- f"overwrite=None is deprecated. Use force_recompute instead.",
+ "overwrite=None is deprecated. Use force_recompute instead.",
DeprecationWarning,
)
if sample_size > 1 or sample_size <= 0:
@@ -1529,20 +1531,19 @@ def _return_correlation(
" `force_recompute=True` to override."
)
return getattr(self, "_" + "_".join(method.split()))
+ elif method == "pearson":
+ self._calc_pearson(corr_df, continuous_columns)
+ return self._pearson
+ elif method == "cramers v":
+ self._calc_cramers_v(corr_df, categorical_columns)
+ return self._cramers_v
+ elif method == "correlation ratio":
+ self._calc_correlation_ratio(
+ corr_df, categorical_columns, continuous_columns
+ )
+ return self._correlation_ratio
else:
- if method == "pearson":
- self._calc_pearson(corr_df, continuous_columns)
- return self._pearson
- elif method == "cramers v":
- self._calc_cramers_v(corr_df, categorical_columns)
- return self._cramers_v
- elif method == "correlation ratio":
- self._calc_correlation_ratio(
- corr_df, categorical_columns, continuous_columns
- )
- return self._correlation_ratio
- else:
- raise ValueError(f"The {method} method is not supported.")
+ raise ValueError(f"The {method} method is not supported.")
@runtime_dependency(module="IPython", install_from=OptionalDependency.NOTEBOOK)
def _reduce_dim_for_wide_dataset(
@@ -1551,8 +1552,11 @@ def _reduce_dim_for_wide_dataset(
min_cores_for_correlation = 2
n_rows, n_columns = self.shape
- from IPython.core.display import display, HTML
+ from IPython.display import HTML
+
+ from ads.common.utils import get_display
+ display = get_display()
if utils.get_cpu_count() <= min_cores_for_correlation:
msg = (
f"Not attempting to calculate correlations, too few cores ({utils.get_cpu_count()}) "
@@ -1695,14 +1699,12 @@ def show_corr(
if correlation_target:
if correlation_target not in features_list:
- raise ValueError(
- "correlation_target has to be in {}.".format(features_list)
- )
+ raise ValueError(f"correlation_target has to be in {features_list}.")
force_recompute = deprecate_variable(
overwrite,
force_recompute,
- f"overwrite=None is deprecated. Use force_recompute instead.",
+ "overwrite=None is deprecated. Use force_recompute instead.",
DeprecationWarning,
)
@@ -1787,7 +1789,7 @@ def show_in_notebook(
html_summary += "
%s" % self.description html_summary += "
Note: Visualizations use a sampled subset of the dataset, this is to improve plotting performance. The sample size is calculated to be statistically - significant within the confidence level: {} and confidence interval: {}. + significant within the confidence level: {DatasetDefaults.sampling_confidence_level} and confidence interval: {DatasetDefaults.sampling_confidence_interval}. - The sampled data has {:,} rows + The sampled data has {sub_samp_df.shape[0]:,} rows
@@ -1818,11 +1820,7 @@ def show_in_notebook( - """.format( - DatasetDefaults.sampling_confidence_level, - DatasetDefaults.sampling_confidence_interval, - sub_samp_df.shape[0], - ) + """ html_summary += "" @@ -1892,7 +1890,7 @@ def show_in_notebook( force_recompute = deprecate_variable( overwrite, force_recompute, - f"overwrite=None is deprecated. Use force_recompute instead.",
+ "overwrite=None is deprecated. Use force_recompute instead.",
DeprecationWarning,
)
plot_type = kwargs.pop("plot_type", "heatmap")
@@ -1909,8 +1907,9 @@ def show_in_notebook(
**kwargs,
)
- from IPython.core.display import display
+ from ads.common.utils import get_display
+ display = get_display()
display(accordion)
# generate html for feature_distribution & warnings
diff --git a/ads/dataset/factory.py b/ads/dataset/factory.py
index c7bfb5139..7d368895e 100644
--- a/ads/dataset/factory.py
+++ b/ads/dataset/factory.py
@@ -1,58 +1,57 @@
#!/usr/bin/env python
-# -*- coding: utf-8; -*-
-# Copyright (c) 2020, 2024 Oracle and/or its affiliates.
+# Copyright (c) 2020, 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
-from __future__ import print_function, absolute_import
+import datetime
+import inspect
import os
import re
import warnings
+from typing import Callable, Tuple
+
+import fsspec
import oci
-import datetime
import pandas as pd
from fsspec.utils import infer_storage_options
-import inspect
-import fsspec
+from ocifs import OCIFileSystem
from ads.common import utils
+from ads.common.decorator.deprecate import deprecated
+from ads.common.decorator.runtime_dependency import (
+ OptionalDependency,
+ runtime_dependency,
+)
from ads.common.utils import is_same_class
from ads.dataset import logger
from ads.dataset.classification_dataset import (
BinaryClassificationDataset,
- MultiClassClassificationDataset,
BinaryTextClassificationDataset,
+ MultiClassClassificationDataset,
MultiClassTextClassificationDataset,
)
from ads.dataset.dataset import ADSDataset
from ads.dataset.forecasting_dataset import ForecastingDataset
from ads.dataset.helper import (
- get_feature_type,
- is_text_data,
- generate_sample,
DatasetDefaults,
- ElaboratedPath,
DatasetLoadException,
+ ElaboratedPath,
+ generate_sample,
+ get_feature_type,
+ is_text_data,
)
from ads.dataset.regression_dataset import RegressionDataset
from ads.type_discovery.type_discovery_driver import TypeDiscoveryDriver
from ads.type_discovery.typed_feature import (
+ CategoricalTypedFeature,
ContinuousTypedFeature,
DateTimeTypedFeature,
- CategoricalTypedFeature,
- OrdinalTypedFeature,
- GISTypedFeature,
DocumentTypedFeature,
+ GISTypedFeature,
+ OrdinalTypedFeature,
+ TypedFeature,
)
-from ads.type_discovery.typed_feature import TypedFeature
-from typing import Callable, Tuple
-from ocifs import OCIFileSystem
-from ads.common.decorator.runtime_dependency import (
- runtime_dependency,
- OptionalDependency,
-)
-from ads.common.decorator.deprecate import deprecated
default_snapshots_dir = None
default_storage_options = None
@@ -361,14 +360,11 @@ def list_snapshots(snapshot_dir=None, name="", storage_options=None, **kwargs):
# display in HTML format if sdk is run in notebook mode
if utils.is_notebook():
- from IPython.core.display import display
+ from ads.common.utils import get_display
+ display = get_display()
display(
- HTML(
- list_df.style.set_table_attributes("class=table")
- .hide()
- .to_html()
- )
+ HTML(list_df.style.set_table_attributes("class=table").hide().to_html())
)
return list_df
@@ -432,7 +428,7 @@ def _download_files(remote_files, local_path, overwrite=False):
os.makedirs(os.path.dirname(local_filepath), exist_ok=True)
with open(local_filepath, "wb") as f2:
f2.write(f1.read())
- except oci.exceptions.ServiceError as e:
+ except oci.exceptions.ServiceError:
raise FileNotFoundError(f"Unable to open file: {remote_file.path}")
return display_error, error_msg
@@ -600,7 +596,7 @@ def _get_dataset(
"It is not recommended to use an empty column as the target variable."
)
raise ValueError(
- f"We do not support using empty columns as the chosen target"
+ "We do not support using empty columns as the chosen target"
)
if is_same_class(target_type, ContinuousTypedFeature):
return RegressionDataset(
@@ -670,11 +666,7 @@ def _get_dataset(
is_same_class(target, DocumentTypedFeature)
or "text" in target_type["type"]
or "text" in target
- ):
- raise ValueError(
- f"The column {target} cannot be used as the target column."
- )
- elif (
+ ) or (
is_same_class(target_type, GISTypedFeature)
or "coord" in target_type["type"]
or "coord" in target
@@ -711,15 +703,15 @@ def read_tsv(path: str, **kwargs) -> pd.DataFrame:
def read_json(path: str, **kwargs) -> pd.DataFrame:
try:
return pd.read_json(path, **kwargs)
- except ValueError as e:
+ except ValueError:
return pd.read_json(
path, **utils.inject_and_copy_kwargs(kwargs, **{"lines": True})
)
@staticmethod
def read_libsvm(path: str, **kwargs) -> pd.DataFrame:
- from sklearn.datasets import load_svmlight_file
from joblib import Memory
+ from sklearn.datasets import load_svmlight_file
mem = Memory("./mycache")
@@ -808,7 +800,7 @@ def read_sql(cls, path: str, table: str = None, **kwargs) -> pd.DataFrame:
@staticmethod
def read_log(path, **kwargs):
- from ads.dataset.helper import parse_apache_log_str, parse_apache_log_datetime
+ from ads.dataset.helper import parse_apache_log_datetime, parse_apache_log_str
df = pd.read_csv(
path,
@@ -851,10 +843,11 @@ def read_html(path, html_table_index: int = None, **kwargs):
@staticmethod
@runtime_dependency(module="scipy", install_from=OptionalDependency.VIZ)
def read_arff(path, **kwargs):
- from scipy.io import arff
- import requests
from io import BytesIO, TextIOWrapper
+ import requests
+ from scipy.io import arff
+
data = None
if os.path.isfile(path):
data, _ = arff.loadarff(path)
@@ -881,7 +874,7 @@ def read_xml(path: str, **kwargs) -> pd.DataFrame:
-------
dataframe : pandas.DataFrame
"""
- import xml.etree.cElementTree as et
+ import xml.etree.ElementTree as et
def get_children(df, node, parent, i):
for name in node.attrib.keys():
@@ -969,18 +962,18 @@ def load_dataset(path: ElaboratedPath, reader_fn: Callable, **kwargs) -> pd.Data
dfs.append(data)
if len(dfs) == 0:
raise ValueError(
- f"We were unable to load the specified dataset. Read more here: "
- f"https://docs.cloud.oracle.com/en-us/iaas/tools/ads"
- f"-sdk/latest/user_guide/loading_data/loading_data.html#specify-data-types-in-load-dataset"
+ "We were unable to load the specified dataset. Read more here: "
+ "https://docs.cloud.oracle.com/en-us/iaas/tools/ads"
+ "-sdk/latest/user_guide/loading_data/loading_data.html#specify-data-types-in-load-dataset"
)
df = pd.concat(dfs)
if df is None:
raise ValueError(
- f"We were unable to load the specified dataset. Read more here: "
- f"https://docs.cloud.oracle.com/en-us/iaas/tools/ads"
- f"-sdk/latest/user_guide/loading_data/loading_data.html#specify-data-types-in-load-dataset"
+ "We were unable to load the specified dataset. Read more here: "
+ "https://docs.cloud.oracle.com/en-us/iaas/tools/ads"
+ "-sdk/latest/user_guide/loading_data/loading_data.html#specify-data-types-in-load-dataset"
)
if df.empty:
raise DatasetLoadException("Empty DataFrame, not producing a ADSDataset")
diff --git a/ads/dataset/helper.py b/ads/dataset/helper.py
index 777a6eb39..587c73d4a 100644
--- a/ads/dataset/helper.py
+++ b/ads/dataset/helper.py
@@ -1,55 +1,52 @@
#!/usr/bin/env python
-# -*- coding: utf-8; -*-
-# Copyright (c) 2020, 2023 Oracle and/or its affiliates.
+# Copyright (c) 2020, 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
import ast
import base64
import html
+import importlib
+import inspect
import io
import math
import os
-import warnings
import re
+import warnings
from collections import defaultdict
-import inspect
-import importlib
-from typing import Callable, List, Tuple, Union
-import fsspec
# from pandas.io.common import _compression_to_extension
-
from numbers import Number
+from typing import Callable, List, Tuple, Union
from urllib.parse import urlparse
+import fsspec
import numpy as np
import pandas as pd
-
from pandas.core.dtypes.common import (
- is_numeric_dtype,
is_bool_dtype,
is_categorical_dtype,
is_datetime64_any_dtype,
is_float_dtype,
+ is_numeric_dtype,
)
+from ads.common import utils
from ads.common.decorator.runtime_dependency import (
- runtime_dependency,
OptionalDependency,
+ runtime_dependency,
)
-from ads.common import utils
from ads.dataset import logger
from ads.type_discovery.type_discovery_driver import TypeDiscoveryDriver
from ads.type_discovery.typed_feature import (
+ CategoricalTypedFeature,
ContinuousTypedFeature,
DateTimeTypedFeature,
- CategoricalTypedFeature,
+ DocumentTypedFeature,
GISTypedFeature,
+ OrdinalTypedFeature,
TypedFeature,
UnknownTypedFeature,
- OrdinalTypedFeature,
- DocumentTypedFeature,
)
@@ -451,20 +448,19 @@ def generate_sample(
sample_size = calculate_sample_size(
n, min_size_to_sample, confidence_level, confidence_interval
)
+ elif min_size_to_sample < requested_sample_size < n:
+ logger.info(
+ f"Downsampling from {n} rows, to the user specified {requested_sample_size} rows for graphing."
+ )
+ sample_size = requested_sample_size
+ elif requested_sample_size >= n:
+ logger.info(f"Using the entire dataset of {n} rows for graphing.")
+ sample_size = n
else:
- if min_size_to_sample < requested_sample_size < n:
- logger.info(
- f"Downsampling from {n} rows, to the user specified {requested_sample_size} rows for graphing."
- )
- sample_size = requested_sample_size
- elif requested_sample_size >= n:
- logger.info(f"Using the entire dataset of {n} rows for graphing.")
- sample_size = n
- else:
- sample_size = min_size_to_sample
- logger.info(
- f"Downsampling from {n} rows, to {sample_size} rows for graphing."
- )
+ sample_size = min_size_to_sample
+ logger.info(
+ f"Downsampling from {n} rows, to {sample_size} rows for graphing."
+ )
if sample_size and len(df) > sample_size:
frac = min(1.0, sample_size * 1.05 / n)
@@ -581,14 +577,10 @@ def visualize_transformation(transformer_pipeline, text=None):
def format_label(stage):
if "FunctionTransformer" in str(transformer_pipeline.steps[stage][1].__class__):
- return "< {} >".format(
- html.escape(transformer_pipeline.steps[stage][1].func.__name__)
- )
+ return f"< {html.escape(transformer_pipeline.steps[stage][1].func.__name__)} >"
else:
is_ads = "ads" in str(transformer_pipeline.steps[stage][1].__class__)
- return "< {} >".format(
- transformer_pipeline.steps[stage][1].__class__.__name__
- )
+ return f"< {transformer_pipeline.steps[stage][1].__class__.__name__} >"
edges = [x[0] for x in transformer_pipeline.steps]
for i, edge in enumerate(list(zip(edges[:-1], edges[1:]))):
@@ -600,8 +592,11 @@ def format_label(stage):
graph = graphviz.Source(dot)
- from IPython.core.display import display, SVG
+ from IPython.display import SVG
+
+ from ads.common.utils import get_display
+ display = get_display()
display(SVG(graph.pipe(format="svg")))
@@ -700,7 +695,7 @@ def _get_imblearn_sampler(X, y):
k_neighbors = min(min_sample_size - 1, 5)
if k_neighbors == 0:
logger.warning(
- f"""k_neighbors is 0 as in the target there exists a class label that appeared only once.
+ """k_neighbors is 0 as in the target there exists a class label that appeared only once.
SMOTE will fail. Default to RandomOverSampler.
"""
)
@@ -806,9 +801,10 @@ def parse_apache_log_datetime(x):
Due to problems parsing the timezone (`%z`) with `datetime.strptime`, the
timezone will be obtained using the `pytz` library.
"""
- import pytz
from datetime import datetime
+ import pytz
+
dt = datetime.strptime(x[1:-7], "%d/%b/%Y:%H:%M:%S")
dt_tz = int(x[-6:-3]) * 60 + int(x[-3:-1])
return dt.replace(tzinfo=pytz.FixedOffset(dt_tz))
@@ -876,7 +872,7 @@ def get_dataset(
logger.warning(
"It is not recommended to use an empty column as the target variable."
)
- raise ValueError(f"We do not support using empty columns as the chosen target")
+ raise ValueError("We do not support using empty columns as the chosen target")
if utils.is_same_class(target_type, ContinuousTypedFeature):
return RegressionDataset(
df=df,
@@ -945,9 +941,7 @@ def get_dataset(
utils.is_same_class(target, DocumentTypedFeature)
or "text" in target_type["type"]
or "text" in target
- ):
- raise ValueError(f"The column {target} cannot be used as the target column.")
- elif (
+ ) or (
utils.is_same_class(target_type, GISTypedFeature)
or "coord" in target_type["type"]
or "coord" in target
@@ -1174,15 +1168,15 @@ def read_tsv(path: str, **kwargs) -> pd.DataFrame:
def read_json(path: str, **kwargs) -> pd.DataFrame:
try:
return pd.read_json(path, **kwargs)
- except ValueError as e:
+ except ValueError:
return pd.read_json(
path, **utils.inject_and_copy_kwargs(kwargs, **{"lines": True})
)
@staticmethod
def read_libsvm(path: str, **kwargs) -> pd.DataFrame:
- from sklearn.datasets import load_svmlight_file
from joblib import Memory
+ from sklearn.datasets import load_svmlight_file
mem = Memory("./mycache")
@@ -1271,7 +1265,7 @@ def read_sql(cls, path: str, table: str = None, **kwargs) -> pd.DataFrame:
@staticmethod
def read_log(path, **kwargs):
- from ads.dataset.helper import parse_apache_log_str, parse_apache_log_datetime
+ from ads.dataset.helper import parse_apache_log_datetime, parse_apache_log_str
df = pd.read_csv(
path,
@@ -1314,10 +1308,11 @@ def read_html(path, html_table_index: int = None, **kwargs):
@staticmethod
@runtime_dependency(module="scipy", install_from=OptionalDependency.VIZ)
def read_arff(path, **kwargs):
- from scipy.io import arff
- import requests
from io import BytesIO, TextIOWrapper
+ import requests
+ from scipy.io import arff
+
data = None
if os.path.isfile(path):
data, _ = arff.loadarff(path)
@@ -1344,7 +1339,7 @@ def read_xml(path: str, **kwargs) -> pd.DataFrame:
-------
dataframe : pandas.DataFrame
"""
- import xml.etree.cElementTree as et
+ import xml.etree.ElementTree as et
def get_children(df, node, parent, i):
for name in node.attrib.keys():
@@ -1432,18 +1427,18 @@ def load_dataset(path: ElaboratedPath, reader_fn: Callable, **kwargs) -> pd.Data
dfs.append(data)
if len(dfs) == 0:
raise ValueError(
- f"We were unable to load the specified dataset. Read more here: "
- f"https://docs.cloud.oracle.com/en-us/iaas/tools/ads"
- f"-sdk/latest/user_guide/loading_data/loading_data.html#specify-data-types-in-load-dataset"
+ "We were unable to load the specified dataset. Read more here: "
+ "https://docs.cloud.oracle.com/en-us/iaas/tools/ads"
+ "-sdk/latest/user_guide/loading_data/loading_data.html#specify-data-types-in-load-dataset"
)
df = pd.concat(dfs)
if df is None:
raise ValueError(
- f"We were unable to load the specified dataset. Read more here: "
- f"https://docs.cloud.oracle.com/en-us/iaas/tools/ads"
- f"-sdk/latest/user_guide/loading_data/loading_data.html#specify-data-types-in-load-dataset"
+ "We were unable to load the specified dataset. Read more here: "
+ "https://docs.cloud.oracle.com/en-us/iaas/tools/ads"
+ "-sdk/latest/user_guide/loading_data/loading_data.html#specify-data-types-in-load-dataset"
)
if df.empty:
raise DatasetLoadException("Empty DataFrame, not producing a ADSDataset")
diff --git a/ads/dataset/plot.py b/ads/dataset/plot.py
index 9195ad34a..6bcf309ae 100644
--- a/ads/dataset/plot.py
+++ b/ads/dataset/plot.py
@@ -1,43 +1,39 @@
#!/usr/bin/env python
-# -*- coding: utf-8; -*-
-# Copyright (c) 2020, 2022 Oracle and/or its affiliates.
+# Copyright (c) 2020, 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
-from __future__ import print_function, absolute_import
import random
from collections import defaultdict
from math import pi
-import pandas as pd
-import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
+import pandas as pd
from matplotlib import colors as mcolors
-from ads.dataset.helper import _log_yscale_not_set
from ads.common.decorator.runtime_dependency import (
- runtime_dependency,
OptionalDependency,
+ runtime_dependency,
)
-from ads.common.utils import _log_plot_high_cardinality_warning, MAX_DISPLAY_VALUES
+from ads.common.utils import MAX_DISPLAY_VALUES, _log_plot_high_cardinality_warning
+from ads.dataset import logger
+from ads.dataset.helper import _log_yscale_not_set
from ads.type_discovery.latlon_detector import LatLonDetector
from ads.type_discovery.typed_feature import (
+ CategoricalTypedFeature,
+ ConstantTypedFeature,
ContinuousTypedFeature,
+ CreditCardTypedFeature,
DateTimeTypedFeature,
- ConstantTypedFeature,
DiscreteTypedFeature,
- CreditCardTypedFeature,
- ZipcodeTypedFeature,
- OrdinalTypedFeature,
- CategoricalTypedFeature,
GISTypedFeature,
+ OrdinalTypedFeature,
+ ZipcodeTypedFeature,
)
-from ads.dataset import logger
-
class Plotting:
def __init__(self, df, feature_types, x, y=None, plot_type="infer", yscale=None):
@@ -88,7 +84,7 @@ def select_best_plot(self):
for choice in choices:
if choice[1].__name__.lower().startswith(self.plot_type.lower()):
return choice
- logger.info("invalid plot_type: {}".format(self.plot_type))
+ logger.info(f"invalid plot_type: {self.plot_type}")
raise ValueError(
"plot_type: '%s' invalid, use one of: %s"
% (self.plot_type, ", ".join([x[0].__name__ for x in choices]))
@@ -97,7 +93,6 @@ def select_best_plot(self):
return choices[0]
def show_in_notebook(self, **kwargs):
-
"""
Visualizes the dataset by plotting the distribution of a feature or relationship between two features.
@@ -345,6 +340,7 @@ def _single_column_count_plot(x, data, yscale=None):
@runtime_dependency(module="folium", install_from=OptionalDependency.VIZ)
def _folium_map(x, data):
import folium.plugins
+
df = LatLonDetector.extract_x_y(data[x])
lat_min, lat_max, long_min, long_max = (
min(df.Y),
@@ -357,8 +353,9 @@ def _folium_map(x, data):
folium.plugins.HeatMap(df[["Y", "X"]]).add_to(m)
m.fit_bounds([[lat_min, long_min], [lat_max, long_max]])
- from IPython.core.display import display
+ from ads.common.utils import get_display
+ display = get_display()
display(m)
@staticmethod
@@ -372,7 +369,7 @@ def _multiple_pdf(x, y, data):
colors = dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS)
hues = [
colors[x]
- for x in colors.keys()
+ for x in colors
if isinstance(colors[x], str) and colors[x].startswith("#")
]
@@ -385,7 +382,6 @@ def _multiple_pdf(x, y, data):
@runtime_dependency(module="seaborn", install_from=OptionalDependency.VIZ)
def _matplot(self, plot_method, figsize=(4, 3), **kwargs):
-
plt.style.use("seaborn-white")
plt.rc("xtick", labelsize="x-small")
@@ -398,13 +394,9 @@ def _matplot(self, plot_method, figsize=(4, 3), **kwargs):
#
# generate a title for the plot
#
- text = '{}, "{}" ({})'.format(
- plot_method.__name__.upper(), self.x, self.feature_types[self.x].type
- )
+ text = f'{plot_method.__name__.upper()}, "{self.x}" ({self.feature_types[self.x].type})'
if self.y:
- text = '{} vs "{}" ({})'.format(
- text, self.y, self.feature_types[self.y].type
- )
+ text = f'{text} vs "{self.y}" ({self.feature_types[self.y].type})'
plt.title(text, y=1.08)
plt.grid(linestyle="dotted")
@@ -425,7 +417,7 @@ def _matplot(self, plot_method, figsize=(4, 3), **kwargs):
# rename the y-axis label and x-axis label when "count" is the y-axis label
if self.y == "count":
- plt.xlabel("Column: {} values ".format(self.x))
+ plt.xlabel(f"Column: {self.x} values ")
plt.ylabel("instance count")
# add y-axis label as "count" when plot type is hist
@@ -454,7 +446,6 @@ def _generic_plot(self, plot_method, **kwargs):
@runtime_dependency(module="seaborn", install_from=OptionalDependency.VIZ)
def _get_plot_method(self):
-
#
# combos contains a dictionary with the key being a composite of the x and y types, the value will
# always be a list, possibly and empty list, indicating no match for combination
diff --git a/ads/dataset/recommendation.py b/ads/dataset/recommendation.py
index 4f3b45f7d..c4c4f1752 100644
--- a/ads/dataset/recommendation.py
+++ b/ads/dataset/recommendation.py
@@ -90,8 +90,9 @@ def _show_constant_fill_widget(self, column):
text.value = self.fill_nan_dict[column].value
self.fill_nan_dict[column] = text
- from IPython.core.display import display
+ from ads.common.utils import get_display
+ display = get_display()
display(text)
if self.control_buttons is not None:
# self.control_buttons.close()
@@ -149,8 +150,9 @@ def show_in_notebook(self):
@runtime_dependency(module="IPython", install_from=OptionalDependency.NOTEBOOK)
@runtime_dependency(module="ipywidgets", install_from=OptionalDependency.NOTEBOOK)
def _display(self):
- from IPython.core.display import display
+ from ads.common.utils import get_display
+ display = get_display()
if self.recommendation_type_index != len(self.recommendation_types):
if (
self.recommendation_types[self.recommendation_type_index]
diff --git a/ads/evaluations/evaluation_plot.py b/ads/evaluations/evaluation_plot.py
index fb89edaee..49ec16e95 100644
--- a/ads/evaluations/evaluation_plot.py
+++ b/ads/evaluations/evaluation_plot.py
@@ -1,26 +1,26 @@
#!/usr/bin/env python
-# -*- coding: utf-8; -*-
-# Copyright (c) 2020, 2023 Oracle and/or its affiliates.
+# Copyright (c) 2020, 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
-from __future__ import print_function, absolute_import, division
import base64
+import itertools
+import math
from io import BytesIO
+
import matplotlib as mpl
-import matplotlib.pyplot as plt
import matplotlib.lines as mlines
-from matplotlib.ticker import FormatStrFormatter
+import matplotlib.pyplot as plt
import numpy as np
-import math
+import pandas as pd
+from matplotlib.ticker import FormatStrFormatter
+
from ads.common import logger
from ads.common.decorator.runtime_dependency import (
- runtime_dependency,
OptionalDependency,
+ runtime_dependency,
)
-import itertools
-import pandas as pd
MAX_TITLE_LEN = 20
MAX_LEGEND_LEN = 10
@@ -36,7 +36,7 @@ def _fig_to_html(fig):
fig.savefig(tmpfile, format="png")
encoded = base64.b64encode(tmpfile.getvalue()).decode("utf-8")
- html = "{html_metrics}
" ) @@ -584,7 +583,7 @@ def _pretty_label(df, labels, copy=True): return html_raw -class ADSEvaluator(object): +class ADSEvaluator: """ADS Evaluator class. This class holds field and methods for creating and using ADS evaluator objects. @@ -684,7 +683,7 @@ def __init__( """ if any(isinstance(m, ADSModel) for m in models): logger.warn( - f"ADSModel is being deprecated. Users should instead use GenericModel or one of its subclasses. More information here: https://accelerated-data-science.readthedocs.io/en/latest/user_guide/model_registration/introduction.html#register" + "ADSModel is being deprecated. Users should instead use GenericModel or one of its subclasses. More information here: https://accelerated-data-science.readthedocs.io/en/latest/user_guide/model_registration/introduction.html#register" ) self.evaluations = [] if isinstance(training_data, ADSDatasetWithTarget): @@ -814,8 +813,8 @@ def add_metrics(self, funcs, names): self.evaluations[0] = pd.concat([self.evaluations[0], pd_res]) if name not in self.metrics_to_show: self.metrics_to_show.append(name) - setattr(self, "train_evaluations", self.evaluations[0]) - setattr(self, "test_evaluations", self.evaluations[1]) + self.train_evaluations = self.evaluations[0] + self.test_evaluations = self.evaluations[1] def del_metrics(self, names): """Removes the listed metrics from the evaluator object it is called on. @@ -901,8 +900,8 @@ def add_models(self, models, show_full_name=False): ) self.evaluations = [total_train_metrics, total_test_metrics] - setattr(self, "train_evaluations", self.evaluations[0]) - setattr(self, "test_evaluations", self.evaluations[1]) + self.train_evaluations = self.evaluations[0] + self.test_evaluations = self.evaluations[1] def del_models(self, names): """Removes the listed models from the evaluator object it is called on. @@ -1044,7 +1043,7 @@ def calculate_cost( cost_df = pd.DataFrame({"model": list_of_model, "cost": cost_per_model}) return cost_df - class EvaluationMetrics(object): + class EvaluationMetrics: """Class holding evaluation metrics. Attributes @@ -1199,8 +1198,11 @@ def _display_metrics(df, data_name, labels, precision): ------- Nothing """ - from IPython.core.display import display, HTML + from IPython.display import HTML + + from ads.common.utils import get_display + display = get_display() display( HTML( _pretty_label(df, labels) diff --git a/ads/explanations/mlx_global_explainer.py b/ads/explanations/mlx_global_explainer.py index 22f14b39c..854ced9a8 100644 --- a/ads/explanations/mlx_global_explainer.py +++ b/ads/explanations/mlx_global_explainer.py @@ -1,26 +1,26 @@ #!/usr/bin/env python -# -*- coding: utf-8; -*- -# Copyright (c) 2020, 2023 Oracle and/or its affiliates. +# Copyright (c) 2020, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ -import numpy as np from abc import ABC, abstractmethod +import numpy as np + from ads.common import logger, utils +from ads.common.decorator.deprecate import deprecated +from ads.common.decorator.runtime_dependency import ( + OptionalDependency, + runtime_dependency, +) from ads.explanations.base_explainer import GlobalExplainer -from ads.explanations.mlx_interface import check_tabular_or_text -from ads.explanations.mlx_interface import init_lime_explainer -from ads.explanations.mlx_interface import init_permutation_importance_explainer from ads.explanations.mlx_interface import ( - init_partial_dependence_explainer, + check_tabular_or_text, init_ale_explainer, + init_lime_explainer, + init_partial_dependence_explainer, + init_permutation_importance_explainer, ) -from ads.common.decorator.runtime_dependency import ( - runtime_dependency, - OptionalDependency, -) -from ads.common.decorator.deprecate import deprecated class MLXGlobalExplainer(GlobalExplainer): @@ -186,9 +186,7 @@ def compute_feature_importance( ] if scoring_metric not in allowed_metrics and scoring_metric is not None: raise Exception( - "Scoring Metric not supported for this type of problem: {}, for problem type {}, the availble supported metrics are {}".format( - scoring_metric, self.mode_, allowed_metrics - ) + f"Scoring Metric not supported for this type of problem: {scoring_metric}, for problem type {self.mode_}, the availble supported metrics are {allowed_metrics}" ) if balance and sampling is None: sampling = {"technique": "random"} @@ -423,8 +421,11 @@ def show_in_notebook(self): # pragma: no cover pdp_plot = self.compute_partial_dependence([pdp_plot_feature_name]) # plot2 = pdp_plot.show_in_notebook() - from IPython.core.display import display, HTML + from IPython.display import HTML + from ads.common.utils import get_display + + display = get_display() display(HTML(plot1.data)) # display(HTML(plot1.data + plot2.data)) @@ -482,9 +483,7 @@ def configure_feature_importance(self, **kwargs): for k, _ in kwargs.items(): if k not in avail_args: raise ValueError( - "Unexpected argument for the feature importance explainer: {}".format( - k - ) + f"Unexpected argument for the feature importance explainer: {k}" ) if kwargs.get("client", None) is not None: @@ -528,9 +527,7 @@ def configure_partial_dependence(self, **kwargs): for k, _ in kwargs.items(): if k not in ["client"]: raise ValueError( - "Unexpected argument for the partial dependence explainer: {}".format( - k - ) + f"Unexpected argument for the partial dependence explainer: {k}" ) if kwargs.get("client", None) is not None: raise ValueError( @@ -563,9 +560,7 @@ def configure_accumulated_local_effects(self, **kwargs): for k, _ in kwargs.items(): if k not in ["client"]: raise ValueError( - "Unexpected argument for the accumulated local effects explainer: {}".format( - k - ) + f"Unexpected argument for the accumulated local effects explainer: {k}" ) if kwargs.get("client", None) is not None: raise ValueError( @@ -696,7 +691,6 @@ def _init_accumulated_local_effects(self, **kwargs): class MLXFeatureDependenceExplanation(ABC): - __name__ = "MLXFeatureDependenceExplanation" def __init__(self, fd, fd_exp): diff --git a/ads/feature_engineering/adsimage/image.py b/ads/feature_engineering/adsimage/image.py index f8d59deb8..9e6405b4a 100644 --- a/ads/feature_engineering/adsimage/image.py +++ b/ads/feature_engineering/adsimage/image.py @@ -1,7 +1,6 @@ #!/usr/bin/env python -# -*- coding: utf-8; -*- -# Copyright (c) 2022 Oracle and/or its affiliates. +# Copyright (c) 2020, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ """ @@ -15,7 +14,7 @@ Examples -------- >>> from ads.feature_engineering import ADSImage ->>> from IPython.core.display import display +>>> from IPython.display import display >>> img = ADSImage.open("1.jpg") >>> display(img) >>> img.save("oci://