From d55065abe0fbf922425057d7206ad8318f6a24ac Mon Sep 17 00:00:00 2001 From: Trey <11891017+crazy-treyn@users.noreply.github.com> Date: Fri, 21 Feb 2025 23:16:46 -0600 Subject: [PATCH 1/4] Added ability to use any type of Fabric data store for the source of the create_shortcut_onelake function. --- src/sempy_labs/lakehouse/_shortcuts.py | 30 ++++++++++++++++++-------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/sempy_labs/lakehouse/_shortcuts.py b/src/sempy_labs/lakehouse/_shortcuts.py index 179a1024..eafbf194 100644 --- a/src/sempy_labs/lakehouse/_shortcuts.py +++ b/src/sempy_labs/lakehouse/_shortcuts.py @@ -1,5 +1,6 @@ import sempy.fabric as fabric from sempy_labs._helper_functions import ( + resolve_item_id, resolve_lakehouse_name, resolve_lakehouse_id, resolve_workspace_name_and_id, @@ -13,7 +14,8 @@ def create_shortcut_onelake( table_name: str, - source_lakehouse: str, + source_item: str | UUID, + source_item_type: str, source_workspace: str | UUID, destination_lakehouse: str, destination_workspace: Optional[str | UUID] = None, @@ -30,10 +32,12 @@ def create_shortcut_onelake( ---------- table_name : str The table name for which a shortcut will be created. - source_lakehouse : str - The Fabric lakehouse in which the table resides. + source_item : str | uuid.UUID + The source Fabric data store item in which the table resides. Can be either the Name or ID of the item. + source_item_type: str + The source Fabric data store item type. Options are 'Lakehouse', 'Warehouse', 'MirroredDatabase', 'SQLDatabase', and 'KQLDatabase'. source_workspace : str | uuid.UUID - The name or ID of the Fabric workspace in which the source lakehouse exists. + The name or ID of the Fabric workspace in which the source data store exists. destination_lakehouse : str The Fabric lakehouse in which the shortcut will be created. destination_workspace : str | uuid.UUID, default=None @@ -58,13 +62,21 @@ def create_shortcut_onelake( raise ValueError( f"{icons.red_dot} The 'destination_path' parameter must be either 'Files' or 'Tables'." ) + if not ( + source_item_type + in ["Lakehouse", "Warehouse", "MirroredDatabase", "SQLDatabase", "KQLDatabase"] + ): + raise ValueError( + f"{icons.red_dot} The 'source_item_type' parameter must be 'Lakehouse', 'Warehouse', 'MirroredDatabase', 'SQLDatabase', or 'KQLDatabase'" + ) (source_workspace_name, source_workspace_id) = resolve_workspace_name_and_id( source_workspace ) - source_lakehouse_id = resolve_lakehouse_id(source_lakehouse, source_workspace_id) - source_lakehouse_name = fabric.resolve_item_name( - item_id=source_lakehouse_id, type="Lakehouse", workspace=source_workspace_id + + source_item_id = resolve_item_id(source_item, source_workspace_id) + source_item_name = fabric.resolve_item_name( + item_id=source_item_id, type=source_item_type, workspace=source_workspace_id ) if destination_workspace is None: @@ -97,7 +109,7 @@ def create_shortcut_onelake( "target": { "oneLake": { "workspaceId": source_workspace_id, - "itemId": source_lakehouse_id, + "itemId": source_item_id, "path": source_full_path, } }, @@ -111,7 +123,7 @@ def create_shortcut_onelake( ) print( - f"{icons.green_dot} The shortcut '{shortcut_name}' was created in the '{destination_lakehouse_name}' lakehouse within the '{destination_workspace_name} workspace. It is based on the '{table_name}' table in the '{source_lakehouse_name}' lakehouse within the '{source_workspace_name}' workspace." + f"{icons.green_dot} The shortcut '{shortcut_name}' was created in the '{destination_lakehouse_name}' lakehouse within the '{destination_workspace_name} workspace. It is based on the '{table_name}' table in the '{source_item_name}' {source_item_type} within the '{source_workspace_name}' workspace." ) From 8f7c69a10963db8ac1f81a0431a934c8c50cc423 Mon Sep 17 00:00:00 2001 From: Trey <11891017+crazy-treyn@users.noreply.github.com> Date: Sun, 23 Feb 2025 15:01:25 -0600 Subject: [PATCH 2/4] better provide backwards compatibility --- src/sempy_labs/lakehouse/_shortcuts.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/sempy_labs/lakehouse/_shortcuts.py b/src/sempy_labs/lakehouse/_shortcuts.py index eafbf194..54536b60 100644 --- a/src/sempy_labs/lakehouse/_shortcuts.py +++ b/src/sempy_labs/lakehouse/_shortcuts.py @@ -14,15 +14,16 @@ def create_shortcut_onelake( table_name: str, - source_item: str | UUID, - source_item_type: str, source_workspace: str | UUID, destination_lakehouse: str, destination_workspace: Optional[str | UUID] = None, shortcut_name: Optional[str] = None, + source_item: str | UUID = None, + source_item_type: str = "Lakehouse", source_path: str = "Tables", destination_path: str = "Tables", -): + **kwargs, +): """ Creates a `shortcut `_ to a delta table in OneLake. @@ -32,10 +33,6 @@ def create_shortcut_onelake( ---------- table_name : str The table name for which a shortcut will be created. - source_item : str | uuid.UUID - The source Fabric data store item in which the table resides. Can be either the Name or ID of the item. - source_item_type: str - The source Fabric data store item type. Options are 'Lakehouse', 'Warehouse', 'MirroredDatabase', 'SQLDatabase', and 'KQLDatabase'. source_workspace : str | uuid.UUID The name or ID of the Fabric workspace in which the source data store exists. destination_lakehouse : str @@ -46,12 +43,24 @@ def create_shortcut_onelake( or if no lakehouse attached, resolves to the workspace of the notebook. shortcut_name : str, default=None The name of the shortcut 'table' to be created. This defaults to the 'table_name' parameter value. + source_item : str | uuid.UUID, default=None + The source Fabric data store item in which the table resides. Can be either the Name or ID of the item. + source_item_type: str, default="Lakehouse" + The source Fabric data store item type. Options are 'Lakehouse', 'Warehouse', 'MirroredDatabase', 'SQLDatabase', and 'KQLDatabase'. source_path : str, default="Tables" A string representing the full path to the table/file in the source lakehouse, including either "Files" or "Tables". Examples: Tables/FolderName/SubFolderName; Files/FolderName/SubFolderName. destination_path: str, default="Tables" A string representing the full path where the shortcut is created, including either "Files" or "Tables". Examples: Tables/FolderName/SubFolderName; Files/FolderName/SubFolderName. """ + if source_item is None: + if "source_lakehouse" in kwargs: + source_item = kwargs.get("source_lakehouse") + else: + raise ValueError( + f"{icons.red_dot} The 'source_item' parameter must be provided." + ) + if not (source_path.startswith("Files") or source_path.startswith("Tables")): raise ValueError( f"{icons.red_dot} The 'source_path' parameter must be either 'Files' or 'Tables'." From e0c5b475ee5bb7c510f24d72dd8c09c40cd7ccb9 Mon Sep 17 00:00:00 2001 From: Trey <11891017+crazy-treyn@users.noreply.github.com> Date: Mon, 24 Feb 2025 13:57:04 -0600 Subject: [PATCH 3/4] optimize resolving of source and destination item names and ids --- src/sempy_labs/lakehouse/_shortcuts.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/sempy_labs/lakehouse/_shortcuts.py b/src/sempy_labs/lakehouse/_shortcuts.py index 54536b60..2162ce1f 100644 --- a/src/sempy_labs/lakehouse/_shortcuts.py +++ b/src/sempy_labs/lakehouse/_shortcuts.py @@ -1,6 +1,6 @@ import sempy.fabric as fabric from sempy_labs._helper_functions import ( - resolve_item_id, + resolve_item_name_and_id, resolve_lakehouse_name, resolve_lakehouse_id, resolve_workspace_name_and_id, @@ -23,7 +23,7 @@ def create_shortcut_onelake( source_path: str = "Tables", destination_path: str = "Tables", **kwargs, -): +): """ Creates a `shortcut `_ to a delta table in OneLake. @@ -83,9 +83,8 @@ def create_shortcut_onelake( source_workspace ) - source_item_id = resolve_item_id(source_item, source_workspace_id) - source_item_name = fabric.resolve_item_name( - item_id=source_item_id, type=source_item_type, workspace=source_workspace_id + (source_item_name, source_item_id) = resolve_item_name_and_id( + item=source_item, type=source_item_type, workspace=source_workspace_id ) if destination_workspace is None: @@ -98,13 +97,8 @@ def create_shortcut_onelake( ) destination_workspace_id = fabric.resolve_workspace_id(destination_workspace) - destination_lakehouse_id = resolve_lakehouse_id( - destination_lakehouse, destination_workspace - ) - destination_lakehouse_name = fabric.resolve_item_name( - item_id=destination_lakehouse_id, - type="Lakehouse", - workspace=destination_workspace_id, + (destination_lakehouse_name, destination_lakehouse_id) = resolve_item_name_and_id( + item=destination_lakehouse, type="Lakehouse", workspace=destination_workspace_id ) if shortcut_name is None: From 45d639a2d07ad75123a86ca0523a64673e103426 Mon Sep 17 00:00:00 2001 From: Trey <11891017+crazy-treyn@users.noreply.github.com> Date: Sat, 7 Jun 2025 08:44:38 -0500 Subject: [PATCH 4/4] Merge latest upstream changes from main branch --- README.md | 79 +- docs/source/conf.py | 2 +- environment.yml | 2 +- notebooks/Service Principal.ipynb | 2 +- pyproject.toml | 4 +- src/sempy_labs/__init__.py | 92 +- src/sempy_labs/_a_lib_info.py | 2 + src/sempy_labs/_ai.py | 4 +- src/sempy_labs/_capacities.py | 188 +- src/sempy_labs/_capacity_migration.py | 40 +- src/sempy_labs/_clear_cache.py | 12 + src/sempy_labs/_connections.py | 6 +- src/sempy_labs/_dashboards.py | 60 + src/sempy_labs/_data_pipelines.py | 36 +- src/sempy_labs/_dataflows.py | 4 +- src/sempy_labs/_dax.py | 30 +- src/sempy_labs/_dax_query_view.py | 57 + src/sempy_labs/_daxformatter.py | 78 + src/sempy_labs/_delta_analyzer.py | 411 ++- src/sempy_labs/_delta_analyzer_history.py | 298 ++ src/sempy_labs/_dictionary_diffs.py | 221 ++ src/sempy_labs/_environments.py | 97 +- src/sempy_labs/_eventhouses.py | 80 +- src/sempy_labs/_eventstreams.py | 50 +- src/sempy_labs/_external_data_shares.py | 14 +- src/sempy_labs/_gateways.py | 8 +- src/sempy_labs/_generate_semantic_model.py | 58 +- src/sempy_labs/_git.py | 91 +- src/sempy_labs/_graphQL.py | 29 +- src/sempy_labs/_helper_functions.py | 1137 +++++- src/sempy_labs/_icons.py | 12 +- src/sempy_labs/_job_scheduler.py | 228 +- src/sempy_labs/_kql_databases.py | 73 +- src/sempy_labs/_kql_querysets.py | 47 +- src/sempy_labs/_kusto.py | 137 + src/sempy_labs/_list_functions.py | 250 +- src/sempy_labs/_managed_private_endpoints.py | 11 +- src/sempy_labs/_mirrored_databases.py | 66 +- src/sempy_labs/_ml_experiments.py | 37 +- src/sempy_labs/_ml_models.py | 32 +- src/sempy_labs/_model_bpa.py | 30 +- src/sempy_labs/_model_bpa_bulk.py | 71 +- src/sempy_labs/_model_bpa_rules.py | 32 +- src/sempy_labs/_mounted_data_factories.py | 119 + src/sempy_labs/_notebooks.py | 87 +- src/sempy_labs/_one_lake_integration.py | 3 +- src/sempy_labs/_semantic_models.py | 138 +- src/sempy_labs/_sql.py | 97 +- src/sempy_labs/_sqldatabase.py | 188 + src/sempy_labs/_tags.py | 194 + src/sempy_labs/_user_delegation_key.py | 42 + src/sempy_labs/_utils.py | 42 + src/sempy_labs/_variable_libraries.py | 89 + src/sempy_labs/_vertipaq.py | 50 +- src/sempy_labs/_vpax.py | 388 ++ src/sempy_labs/_warehouses.py | 41 +- src/sempy_labs/_workloads.py | 32 +- src/sempy_labs/_workspace_identity.py | 6 + src/sempy_labs/_workspaces.py | 62 +- src/sempy_labs/admin/__init__.py | 59 +- src/sempy_labs/admin/_apps.py | 2 +- src/sempy_labs/admin/_artifacts.py | 62 + src/sempy_labs/admin/_basic_functions.py | 210 +- src/sempy_labs/admin/_capacities.py | 223 +- src/sempy_labs/admin/_dataflows.py | 45 + src/sempy_labs/admin/_items.py | 27 +- src/sempy_labs/admin/_reports.py | 76 +- src/sempy_labs/admin/_scanner.py | 6 +- src/sempy_labs/admin/_shared.py | 76 + src/sempy_labs/admin/_tags.py | 126 + src/sempy_labs/admin/_tenant.py | 494 +++ src/sempy_labs/admin/_users.py | 133 + src/sempy_labs/admin/_workspaces.py | 148 + .../directlake/_directlake_schema_compare.py | 3 +- .../directlake/_directlake_schema_sync.py | 84 +- src/sempy_labs/directlake/_dl_helper.py | 6 - .../directlake/_generate_shared_expression.py | 76 +- src/sempy_labs/directlake/_guardrails.py | 3 +- ...e_directlake_model_lakehouse_connection.py | 199 +- .../_update_directlake_partition_entity.py | 19 +- .../dotnet_lib/dotnet.runtime.config.json | 10 + src/sempy_labs/graph/_groups.py | 6 + src/sempy_labs/graph/_teams.py | 2 + src/sempy_labs/graph/_users.py | 4 + src/sempy_labs/lakehouse/__init__.py | 31 +- src/sempy_labs/lakehouse/_blobs.py | 246 ++ .../lakehouse/_get_lakehouse_columns.py | 59 +- .../lakehouse/_get_lakehouse_tables.py | 109 +- src/sempy_labs/lakehouse/_helper.py | 211 ++ src/sempy_labs/lakehouse/_lakehouse.py | 94 +- src/sempy_labs/lakehouse/_livy_sessions.py | 137 + src/sempy_labs/lakehouse/_shortcuts.py | 268 +- .../migration/_direct_lake_to_import.py | 57 +- .../_migrate_calctables_to_lakehouse.py | 19 +- .../migration/_migration_validation.py | 4 - .../migration/_refresh_calc_tables.py | 13 +- .../__init__.py | 15 + .../_discover.py | 209 ++ .../_refresh_catalog_metadata.py | 43 + src/sempy_labs/report/__init__.py | 6 + src/sempy_labs/report/_download_report.py | 17 +- src/sempy_labs/report/_export_report.py | 1 - src/sempy_labs/report/_generate_report.py | 42 +- src/sempy_labs/report/_report_bpa.py | 31 +- src/sempy_labs/report/_report_functions.py | 29 +- src/sempy_labs/report/_report_helper.py | 177 +- src/sempy_labs/report/_report_rebind.py | 14 +- src/sempy_labs/report/_reportwrapper.py | 3122 ++++++++++------- src/sempy_labs/report/_save_report.py | 147 + src/sempy_labs/tom/_model.py | 788 ++++- tests/test_shortcuts.py | 57 - tests/test_tom.py | 40 - 112 files changed, 10508 insertions(+), 3373 deletions(-) create mode 100644 src/sempy_labs/_a_lib_info.py create mode 100644 src/sempy_labs/_dashboards.py create mode 100644 src/sempy_labs/_dax_query_view.py create mode 100644 src/sempy_labs/_daxformatter.py create mode 100644 src/sempy_labs/_delta_analyzer_history.py create mode 100644 src/sempy_labs/_dictionary_diffs.py create mode 100644 src/sempy_labs/_kusto.py create mode 100644 src/sempy_labs/_mounted_data_factories.py create mode 100644 src/sempy_labs/_sqldatabase.py create mode 100644 src/sempy_labs/_tags.py create mode 100644 src/sempy_labs/_user_delegation_key.py create mode 100644 src/sempy_labs/_utils.py create mode 100644 src/sempy_labs/_variable_libraries.py create mode 100644 src/sempy_labs/_vpax.py create mode 100644 src/sempy_labs/admin/_artifacts.py create mode 100644 src/sempy_labs/admin/_dataflows.py create mode 100644 src/sempy_labs/admin/_shared.py create mode 100644 src/sempy_labs/admin/_tags.py create mode 100644 src/sempy_labs/admin/_tenant.py create mode 100644 src/sempy_labs/admin/_users.py create mode 100644 src/sempy_labs/admin/_workspaces.py create mode 100644 src/sempy_labs/dotnet_lib/dotnet.runtime.config.json create mode 100644 src/sempy_labs/lakehouse/_blobs.py create mode 100644 src/sempy_labs/lakehouse/_helper.py create mode 100644 src/sempy_labs/lakehouse/_livy_sessions.py create mode 100644 src/sempy_labs/mirrored_azure_databricks_catalog/__init__.py create mode 100644 src/sempy_labs/mirrored_azure_databricks_catalog/_discover.py create mode 100644 src/sempy_labs/mirrored_azure_databricks_catalog/_refresh_catalog_metadata.py create mode 100644 src/sempy_labs/report/_save_report.py delete mode 100644 tests/test_shortcuts.py delete mode 100644 tests/test_tom.py diff --git a/README.md b/README.md index b7047cf1..22b5bfa6 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Semantic Link Labs [![PyPI version](https://badge.fury.io/py/semantic-link-labs.svg)](https://badge.fury.io/py/semantic-link-labs) -[![Read The Docs](https://readthedocs.org/projects/semantic-link-labs/badge/?version=0.9.3&style=flat)](https://readthedocs.org/projects/semantic-link-labs/) +[![Read The Docs](https://readthedocs.org/projects/semantic-link-labs/badge/?version=0.10.0&style=flat)](https://readthedocs.org/projects/semantic-link-labs/) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![Downloads](https://static.pepy.tech/badge/semantic-link-labs)](https://pepy.tech/project/semantic-link-labs) @@ -9,6 +9,12 @@ [Read the documentation on ReadTheDocs!](https://semantic-link-labs.readthedocs.io/en/stable/) --- +[Read the Wiki!](https://github.com/microsoft/semantic-link-labs/wiki) +--- + +[See code examples!](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples) +--- + Semantic Link Labs is a Python library designed for use in [Microsoft Fabric notebooks](https://learn.microsoft.com/fabric/data-engineering/how-to-use-notebook). This library extends the capabilities of [Semantic Link](https://learn.microsoft.com/fabric/data-science/semantic-link-overview) offering additional functionalities to seamlessly integrate and work alongside it. The goal of Semantic Link Labs is to simplify technical processes, empowering people to focus on higher level activities and allowing tasks that are better suited for machines to be efficiently handled without human intervention. If you encounter any issues, please [raise a bug](https://github.com/microsoft/semantic-link-labs/issues/new?assignees=&labels=&projects=&template=bug_report.md&title=). @@ -22,43 +28,46 @@ Check out the video below for an introduction to Semantic Link, Semantic Link La ## Featured Scenarios * Semantic Models * [Migrating an import/DirectQuery semantic model to Direct Lake](https://github.com/microsoft/semantic-link-labs?tab=readme-ov-file#direct-lake-migration) - * [Model Best Practice Analyzer (BPA)](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.run_model_bpa) - * [Vertipaq Analyzer](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.vertipaq_analyzer) + * [Model Best Practice Analyzer (BPA)](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples#model-best-practice-analyzer) + * [Vertipaq Analyzer](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples#vertipaq-analyzer) + * [Create a .vpax file](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples#create-a-vpax-file) * [Tabular Object Model](https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Tabular%20Object%20Model.ipynb) [(TOM)](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.tom.html) - * [Translate a semantic model's metadata](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.translate_semantic_model) + * [Translate a semantic model's metadata](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples#translate-a-semantic-model) * [Check Direct Lake Guardrails](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.lakehouse.html#sempy_labs.lakehouse.get_lakehouse_tables) - * [Refresh](https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Semantic%20Model%20Refresh.ipynb), [clear cache](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.clear_cache), [backup](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.backup_semantic_model), [restore](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.restore_semantic_model), [copy backup files](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.copy_semantic_model_backup_file), [move/deploy across workspaces](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.deploy_semantic_model) + * [Refresh](https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Semantic%20Model%20Refresh.ipynb), [clear cache](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.clear_cache), [backup](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples#backup-a-semantic-model), [restore](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples#restore-a-semantic-model), [copy backup files](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.copy_semantic_model_backup_file), [move/deploy across workspaces](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.deploy_semantic_model) * [Run DAX queries which impersonate a user](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.evaluate_dax_impersonation) * [Manage Query Scale Out](https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Query%20Scale%20Out.ipynb) - * [Auto-generate descriptions for any/all measures in bulk](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.tom.html#sempy_labs.tom.TOMWrapper.generate_measure_descriptions) - * [Warm the cache of a Direct Lake semantic model after a refresh (using columns currently in memory)](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.directlake.html#sempy_labs.directlake.warm_direct_lake_cache_isresident) - * [Warm the cache of a Direct Lake semantic model (via perspective)](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.directlake.html#sempy_labs.directlake.warm_direct_lake_cache_perspective) - * [Visualize a refresh](https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Semantic%20Model%20Refresh.ipynb) + * [Auto-generate descriptions for any/all measures in bulk](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples#auto-generate-measure-descriptions) + * [Warm the cache of a Direct Lake semantic model after a refresh (using columns currently in memory)](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples#warm-cache-the-cache-of-a-direct-lake-semantic-model) + * [Warm the cache of a Direct Lake semantic model (via perspective)](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples#warm-cache-the-cache-of-a-direct-lake-semantic-model) + * [Visualize a refresh](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples#refresh-a-semantic-model) * [Update the connection of a Direct Lake semantic model](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.directlake.html#sempy_labs.directlake.update_direct_lake_model_connection) * [Dynamically generate a Direct Lake semantic model](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.directlake.html#sempy_labs.directlake.generate_direct_lake_semantic_model) * [Check why a Direct Lake semantic model would fallback to DirectQuery](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.directlake.html#sempy_labs.directlake.check_fallback_reason) * [View a measure dependency tree](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.measure_dependency_tree) * [View unique columns touched in a single (or multiple) DAX query(ies)](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.get_dax_query_dependencies) * [Analyze delta tables for Direct Lake semantic models using Delta Analyzer](https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Delta%20Analyzer.ipynb) - * [View synonyms from the linguistic schema](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.list_synonyms) + * [View synonyms from the linguistic schema](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples#list-the-synonyms-in-the-linguistic-metadata) * [Add](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.tom.html#sempy_labs.tom.TOMWrapper.add_incremental_refresh_policy), [update](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.tom.html#sempy_labs.tom.TOMWrapper.update_incremental_refresh_policy) and [view](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.tom.html#sempy_labs.tom.TOMWrapper.show_incremental_refresh_policy) an incremental refresh policy. * Reports - * [Report Best Practice Analyzer (BPA)](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.report.html#sempy_labs.report.run_report_bpa) - * [View report metadata](https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Report%20Analysis.ipynb) - * [View semantic model objects most frequently used in Power BI reports](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.list_semantic_model_object_report_usage) - * [View broken reports](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.list_report_semantic_model_objects) - * [Set a report theme](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.report.html#sempy_labs.report.ReportWrapper.set_theme) - * [Migrate report-level measures to the semantic model](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.report.html#sempy_labs.report.ReportWrapper.migrate_report_level_measures) - * [Rebind reports](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.report.html#sempy_labs.report.report_rebind) + * [Report Best Practice Analyzer (BPA)](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples#report-best-practice-analyzer) + * [View report metadata](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples#view-report-metadata) + * [View semantic model objects most frequently used in Power BI reports](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples#show-the-frequency-of-semantic-model-object-used-within-reports) + * [View broken reports](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples#find-broken-visuals-in-a-power-bi-report) + * [Set a report theme](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples#set-the-theme-of-a-report) + * [Migrate report-level measures to the semantic model](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples#migrate-report-level-measures-to-the-semantic-model) + * [Rebind reports](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples#rebind-a-report-to-a-different-semantic-model) + * [Save a report as a .pbip](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples#save-a-report-as-a-pbip-file) * Capacities * [Migrating a Power BI Premium capacity (P sku) to a Fabric capacity (F sku)](https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Capacity%20Migration.ipynb) * [Migrating a Fabric Trial capacity (FT sku) to a Fabric capacity (F sku)](https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Capacity%20Migration.ipynb) * [Create](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.create_fabric_capacity)/[update](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.update_fabric_capacity)/[suspend](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.suspend_fabric_capacity)/[resume](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.resume_fabric_capacity) Fabric capacities * Lakehouses - * [Optimize lakehouse tables](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.lakehouse.html#sempy_labs.lakehouse.optimize_lakehouse_tables) - * [Vacuum lakehouse tables](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.lakehouse.html#sempy_labs.lakehouse.vacuum_lakehouse_tables) - * [Create](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.lakehouse.html#sempy_labs.lakehouse.create_shortcut_onelake), [delete](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.lakehouse.html#sempy_labs.lakehouse.delete_shortcut), and [view shortcuts](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.list_shortcuts) + * [Optimize lakehouse tables](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples#optimize-lakehouse-tables) + * [Vacuum lakehouse tables](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples#vacuum-lakehouse-tables) + * [Create](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples#create-a-onelake-shortcut), [delete](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.lakehouse.html#sempy_labs.lakehouse.delete_shortcut), and [view shortcuts](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.list_shortcuts) * [Analyze delta tables for Direct Lake semantic models using Delta Analyzer](https://github.com/microsoft/semantic-link-labs/blob/main/notebooks/Delta%20Analyzer.ipynb) + * [Recover a soft-deleted lakehouse table/file/folder](https://github.com/microsoft/semantic-link-labs/wiki/Code-Examples#recover-a-lakehouse-object) * Notebooks * [Import a notebook from the web](https://semantic-link-labs.readthedocs.io/en/stable/sempy_labs.html#sempy_labs.import_notebook_from_web) * APIs @@ -120,6 +129,15 @@ An even better way to ensure the semantic-link-labs library is available in your 2. Select your newly created environment within the 'Environment' drop down in the navigation bar at the top of the notebook ## Version History +* [0.10.0](https://github.com/microsoft/semantic-link-labs/releases/tag/0.10.0) (May 30, 2025) +* [0.9.11](https://github.com/microsoft/semantic-link-labs/releases/tag/0.9.11) (May 22, 2025) +* [0.9.10](https://github.com/microsoft/semantic-link-labs/releases/tag/0.9.10) (April 24, 2025) +* [0.9.9](https://github.com/microsoft/semantic-link-labs/releases/tag/0.9.9) (April 7, 2025) +* [0.9.8](https://github.com/microsoft/semantic-link-labs/releases/tag/0.9.8) (April 3, 2025) +* [0.9.7](https://github.com/microsoft/semantic-link-labs/releases/tag/0.9.7) (April 1, 2025) +* [0.9.6](https://github.com/microsoft/semantic-link-labs/releases/tag/0.9.6) (March 12, 2025) +* [0.9.5](https://github.com/microsoft/semantic-link-labs/releases/tag/0.9.5) (March 7, 2025) +* [0.9.4](https://github.com/microsoft/semantic-link-labs/releases/tag/0.9.4) (February 27, 2025) * [0.9.3](https://github.com/microsoft/semantic-link-labs/releases/tag/0.9.3) (February 13, 2025) * [0.9.2](https://github.com/microsoft/semantic-link-labs/releases/tag/0.9.2) (February 5, 2025) * [0.9.1](https://github.com/microsoft/semantic-link-labs/releases/tag/0.9.1) (January 22, 2025) @@ -260,10 +278,11 @@ python -m build #### Running and testing the .whl file 1. Open a notebook in a Fabric workspace. -2. Navigate to 'Resources' within the Explorer tab. -3. Upload the .whl file here. -4. Drag the .whl file into a notebook cell. -5. Run the %pip install command generated by step 4. +2. Navigate to 'Resources' within the Explorer tab on the left pane. Do not use the 'Files' section. +3. Upload the .whl file to the 'Resource' section. +4. Click on the '...' next to the .whl file and click 'Copy relative path'. +5. Enter '%pip install ""' into a notebook cell. Within the double quotes, paste the copied path from step 4. +6. Run the notebook cell. #### Submitting a Pull Request (PR) 1. Within the 'Source Control' tab, commit your changes to the branch. @@ -272,6 +291,18 @@ python -m build 4. Enter details into the description. 5. Click 'Create'. +#### Code Formatting +We use [black](github.com/psf/black) formatting as a code formatting standard. Make sure to run 'black' formatting on your code before submitting a pull request. + +Run this code to install black +```cli +pip install black==25.1.0 +``` + +Run this code to format your code using black +```cli +python -m black src +``` ## Trademarks diff --git a/docs/source/conf.py b/docs/source/conf.py index 8da1253c..05dbb6eb 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -13,7 +13,7 @@ project = 'semantic-link-labs' copyright = '2024, Microsoft and community' author = 'Microsoft and community' -release = '0.9.3' +release = '0.10.0' # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/environment.yml b/environment.yml index 04413524..12e4024d 100644 --- a/environment.yml +++ b/environment.yml @@ -6,7 +6,7 @@ dependencies: - pytest-cov - pytest-mock - pip: - - semantic-link-sempy>=0.8.5 + - semantic-link-sempy>=0.10.2 - azure-identity==1.7.1 - azure-storage-blob>=12.9.0 - pandas-stubs diff --git a/notebooks/Service Principal.ipynb b/notebooks/Service Principal.ipynb index 601ec3f4..a60f3062 100644 --- a/notebooks/Service Principal.ipynb +++ b/notebooks/Service Principal.ipynb @@ -144,7 +144,7 @@ " key_vault_client_secret=key_vault_client_secret):\n", "\n", " labs.suspend_fabric_capacity(\n", - " capacity_name='',\n", + " capacity='',\n", " azure_subscription_id='',\n", " resource_group='',\n", " )" diff --git a/pyproject.toml b/pyproject.toml index 6a709c45..5473ace0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ name="semantic-link-labs" authors = [ { name = "Microsoft Corporation" }, ] -version="0.9.3" +version="0.10.0" description="Semantic Link Labs for Microsoft Fabric" readme="README.md" requires-python=">=3.10,<3.12" @@ -23,7 +23,7 @@ classifiers = [ license= { text = "MIT License" } dependencies = [ - "semantic-link-sempy>=0.8.5", + "semantic-link-sempy>=0.10.2", "anytree", "powerbiclient", "polib", diff --git a/src/sempy_labs/__init__.py b/src/sempy_labs/__init__.py index e330b584..8ed8609d 100644 --- a/src/sempy_labs/__init__.py +++ b/src/sempy_labs/__init__.py @@ -1,6 +1,35 @@ +from sempy_labs._variable_libraries import ( + list_variable_libraries, + delete_variable_library, +) +from sempy_labs._kusto import ( + query_kusto, + query_workspace_monitoring, +) +from sempy_labs._vpax import ( + create_vpax, +) +from sempy_labs._delta_analyzer_history import ( + delta_analyzer_history, +) +from sempy_labs._dax_query_view import ( + generate_dax_query_view_url, +) +from sempy_labs._mounted_data_factories import ( + list_mounted_data_factories, + get_mounted_data_factory_definition, + delete_mounted_data_factory, +) +from sempy_labs._tags import ( + list_tags, + apply_tags, + unapply_tags, +) from sempy_labs._semantic_models import ( get_semantic_model_refresh_schedule, enable_semantic_model_scheduled_refresh, + delete_semantic_model, + update_semantic_model_refresh_schedule, ) from sempy_labs._graphQL import ( list_graphql_apis, @@ -9,8 +38,14 @@ from sempy_labs._job_scheduler import ( list_item_job_instances, list_item_schedules, + create_item_schedule_cron, + create_item_schedule_daily, + create_item_schedule_weekly, +) +from sempy_labs._delta_analyzer import ( + delta_analyzer, + get_delta_table_history, ) -from sempy_labs._delta_analyzer import delta_analyzer from sempy_labs._gateways import ( list_gateway_members, list_gateway_role_assigments, @@ -93,7 +128,7 @@ ) from sempy_labs._kql_databases import ( list_kql_databases, - create_kql_database, + # create_kql_database, delete_kql_database, ) from sempy_labs._mirrored_warehouses import list_mirrored_warehouses @@ -101,6 +136,7 @@ create_environment, delete_environment, publish_environment, + list_environments, ) from sempy_labs._clear_cache import ( clear_cache, @@ -136,6 +172,7 @@ create_or_update_resource_group, list_resource_groups, get_resource_group, + list_capacities, ) from sempy_labs._spark import ( get_spark_settings, @@ -153,6 +190,7 @@ assign_workspace_to_capacity, unassign_workspace_from_capacity, list_workspace_role_assignments, + delete_workspace, ) from sempy_labs._notebooks import ( get_notebook_definition, @@ -163,6 +201,14 @@ from sempy_labs._sql import ( ConnectWarehouse, ConnectLakehouse, + ConnectSQLDatabase, +) +from sempy_labs._sqldatabase import ( + get_sql_database_columns, + get_sql_database_tables, + create_sql_database, + delete_sql_database, + list_sql_databases, ) from sempy_labs._workspace_identity import ( provision_workspace_identity, @@ -182,6 +228,8 @@ connect_workspace_to_azure_dev_ops, connect_workspace_to_github, disconnect_workspace_from_git, + get_my_git_credentials, + update_my_git_credentials, ) from sempy_labs._dataflows import ( list_dataflow_storage_accounts, @@ -215,6 +263,9 @@ update_semantic_model_from_bim, get_semantic_model_definition, ) +from sempy_labs._dashboards import ( + list_dashboards, +) from sempy_labs._list_functions import ( list_reports_using_semantic_model, list_semantic_model_object_report_usage, @@ -222,8 +273,6 @@ list_semantic_model_objects, list_shortcuts, get_object_level_security, - list_capacities, - list_dashboards, list_datamarts, list_lakehouses, list_sql_endpoints, @@ -291,11 +340,15 @@ vertipaq_analyzer, import_vertipaq_analyzer, ) +from sempy_labs._user_delegation_key import ( + get_user_delegation_key, +) __all__ = [ "resolve_warehouse_id", "ConnectWarehouse", "ConnectLakehouse", + "ConnectSQLDatabase", "update_semantic_model_from_bim", "list_connections", "get_semantic_model_size", @@ -413,7 +466,7 @@ "convert_to_friendly_case", "list_mirrored_warehouses", "list_kql_databases", - "create_kql_database", + # "create_kql_database", "delete_kql_database", "create_eventhouse", "list_eventhouses", @@ -507,4 +560,33 @@ "get_semantic_model_refresh_schedule", "get_eventhouse_definition", "enable_semantic_model_scheduled_refresh", + "get_delta_table_history", + "get_sql_database_columns", + "get_sql_database_tables", + "create_item_schedule_cron", + "create_item_schedule_daily", + "create_item_schedule_weekly", + "get_my_git_credentials", + "update_my_git_credentials", + "list_mounted_data_factories", + "get_mounted_data_factory_definition", + "delete_mounted_data_factory", + "generate_dax_query_view_url", + "delete_semantic_model", + "delete_workspace", + "create_sql_database", + "delete_sql_database", + "list_sql_databases", + "delta_analyzer_history", + "query_kusto", + "query_workspace_monitoring", + "list_environments", + "list_tags", + "list_variable_libraries", + "delete_variable_library", + "create_vpax", + "update_semantic_model_refresh_schedule", + "apply_tags", + "unapply_tags", + "get_user_delegation_key", ] diff --git a/src/sempy_labs/_a_lib_info.py b/src/sempy_labs/_a_lib_info.py new file mode 100644 index 00000000..fa038dcd --- /dev/null +++ b/src/sempy_labs/_a_lib_info.py @@ -0,0 +1,2 @@ +lib_name = "semanticlinklabs" +lib_version = "0.10.0" diff --git a/src/sempy_labs/_ai.py b/src/sempy_labs/_ai.py index b569c1c3..1924444e 100644 --- a/src/sempy_labs/_ai.py +++ b/src/sempy_labs/_ai.py @@ -216,7 +216,9 @@ def generate_aggs( f"{icons.green_dot} The '{aggLakeTName}' table has been created/updated in the lakehouse." # Create/update semantic model agg table - tom_server = fabric.create_tom_server(readonly=False, workspace=workspace) + tom_server = fabric.create_tom_server( + dataset=dataset, readonly=False, workspace=workspace + ) m = tom_server.Databases.GetByName(dataset).Model print(f"\n{icons.in_progress} Updating the '{dataset}' semantic model...") dfC_agg = dfC[dfC["Table Name"] == aggTableName] diff --git a/src/sempy_labs/_capacities.py b/src/sempy_labs/_capacities.py index 613190d4..21ac15a8 100644 --- a/src/sempy_labs/_capacities.py +++ b/src/sempy_labs/_capacities.py @@ -1,4 +1,3 @@ -import sempy.fabric as fabric from typing import Optional, List, Tuple from sempy._utils._log import log import sempy_labs._icons as icons @@ -242,7 +241,7 @@ def list_vcores() -> pd.DataFrame: def get_capacity_resource_governance(capacity_name: str): - dfC = fabric.list_capacities() + dfC = list_capacities() dfC_filt = dfC[dfC["Display Name"] == capacity_name] capacity_id = dfC_filt["Id"].iloc[0].upper() @@ -256,7 +255,6 @@ def suspend_fabric_capacity( capacity_name: str, azure_subscription_id: str, resource_group: str, - **kwargs, ): """ This function suspends a Fabric capacity. @@ -275,26 +273,9 @@ def suspend_fabric_capacity( The name of the Azure resource group. """ - token_provider = auth.token_provider.get() - if token_provider is None: - token_provider = ServicePrincipalTokenProvider.from_azure_key_vault( - key_vault_uri=kwargs["key_vault_uri"], - key_vault_tenant_id=kwargs["key_vault_tenant_id"], - key_vault_client_id=kwargs["key_vault_client_id"], - key_vault_client_secret=kwargs["key_vault_client_secret"], - ) - print( - f"{icons.info} Please use the 'token_provider' parameter instead of the key vault parameters within this function as the key vault parameters have been deprecated." - ) - - headers = _get_headers(token_provider, audience="azure") - url = f"https://management.azure.com/subscriptions/{azure_subscription_id}/resourceGroups/{resource_group}/providers/Microsoft.Fabric/capacities/{capacity_name}/suspend?api-version={icons.azure_api_version}" - response = requests.post(url, headers=headers) - - if response.status_code != 202: - raise FabricHTTPException(response) + _base_api(request=url, client="azure", method="post", status_codes=202) print(f"{icons.green_dot} The '{capacity_name}' capacity has been suspended.") @@ -304,7 +285,6 @@ def resume_fabric_capacity( capacity_name: str, azure_subscription_id: str, resource_group: str, - **kwargs, ): """ This function resumes a Fabric capacity. @@ -323,26 +303,9 @@ def resume_fabric_capacity( The name of the Azure resource group. """ - token_provider = auth.token_provider.get() - if token_provider is None: - token_provider = ServicePrincipalTokenProvider.from_azure_key_vault( - key_vault_uri=kwargs["key_vault_uri"], - key_vault_tenant_id=kwargs["key_vault_tenant_id"], - key_vault_client_id=kwargs["key_vault_client_id"], - key_vault_client_secret=kwargs["key_vault_client_secret"], - ) - print( - f"{icons.info} Please use the 'token_provider' parameter instead of the key vault parameters within this function as the key vault parameters have been deprecated." - ) - - headers = _get_headers(token_provider, audience="azure") - url = f"https://management.azure.com/subscriptions/{azure_subscription_id}/resourceGroups/{resource_group}/providers/Microsoft.Fabric/capacities/{capacity_name}/resume?api-version={icons.azure_api_version}" - response = requests.post(url, headers=headers) - - if response.status_code != 202: - raise FabricHTTPException(response) + _base_api(request=url, client="azure", method="post", status_codes=202) print(f"{icons.green_dot} The '{capacity_name}' capacity has been resumed.") @@ -352,7 +315,6 @@ def delete_embedded_capacity( capacity_name: str, azure_subscription_id: str, resource_group: str, - **kwargs, ): """ This function deletes a Power BI Embedded capacity. @@ -369,53 +331,36 @@ def delete_embedded_capacity( The name of the Azure resource group. """ - token_provider = auth.token_provider.get() - if token_provider is None: - token_provider = ServicePrincipalTokenProvider.from_azure_key_vault( - key_vault_uri=kwargs["key_vault_uri"], - key_vault_tenant_id=kwargs["key_vault_tenant_id"], - key_vault_client_id=kwargs["key_vault_client_id"], - key_vault_client_secret=kwargs["key_vault_client_secret"], - ) - print( - f"{icons.info} Please use the 'token_provider' parameter instead of the key vault parameters within this function as the key vault parameters have been deprecated." - ) - - headers = _get_headers(token_provider, audience="azure") - url = f"https://management.azure.com/subscriptions/{azure_subscription_id}/resourceGroups/{resource_group}/providers/Microsoft.PowerBIDedicated/capacities/{capacity_name}?api-version={icons.azure_api_version}" - response = requests.delete(url, headers=headers) - - if response.status_code not in [200, 202]: - raise FabricHTTPException(response) + _base_api(request=url, client="azure", method="delete", status_codes=[200, 202]) print(f"{icons.green_dot} The '{capacity_name}' capacity has been deleted.") @log -def delete_premium_capacity(capacity_name: str): +def delete_premium_capacity(capacity: str | UUID, **kwargs): """ This function deletes a Power BI Premium capacity. Parameters ---------- - capacity_name : str - Name of the Fabric capacity. + capacity : str | uuid.UUID + Name or ID of the Fabric capacity. """ + from sempy_labs._helper_functions import resolve_capacity_id - dfC = fabric.list_capacities() - - dfC_filt = dfC[dfC["Display Name"] == capacity_name] - if len(dfC_filt) == 0: - raise ValueError( - f"{icons.red_dot} The '{capacity_name}' capacity does not exist." + if "capacity_name" in kwargs: + capacity = kwargs["capacity_name"] + print( + f"{icons.warning} The 'capacity_name' parameter is deprecated. Please use 'capacity' instead." ) - capacity_id = dfC_filt["Id"].iloc[0].upper() + + capacity_id = resolve_capacity_id(capacity=capacity).upper() _base_api(request=f"capacities/{capacity_id}", method="delete", status_codes=204) - print(f"{icons.green_dot} The '{capacity_name}' capacity has been deleted.") + print(f"{icons.green_dot} The '{capacity}' capacity has been deleted.") @log @@ -423,7 +368,6 @@ def delete_fabric_capacity( capacity_name: str, azure_subscription_id: str, resource_group: str, - **kwargs, ): """ This function deletes a Fabric capacity. @@ -442,26 +386,9 @@ def delete_fabric_capacity( The name of the Azure resource group. """ - token_provider = auth.token_provider.get() - if token_provider is None: - token_provider = ServicePrincipalTokenProvider.from_azure_key_vault( - key_vault_uri=kwargs["key_vault_uri"], - key_vault_tenant_id=kwargs["key_vault_tenant_id"], - key_vault_client_id=kwargs["key_vault_client_id"], - key_vault_client_secret=kwargs["key_vault_client_secret"], - ) - print( - f"{icons.info} Please use the 'token_provider' parameter instead of the key vault parameters within this function as the key vault parameters have been deprecated." - ) - - headers = _get_headers(token_provider, audience="azure") - url = f"https://management.azure.com/subscriptions/{azure_subscription_id}/resourceGroups/{resource_group}/providers/Microsoft.Fabric/capacities/{capacity_name}?api-version={icons.azure_api_version}" - response = requests.delete(url, headers=headers) - - if response.status_code != 202: - raise FabricHTTPException(response) + _base_api(request=url, client="azure", method="delete", status_codes=202) print(f"{icons.green_dot} The '{capacity_name}' capacity has been deleted.") @@ -474,7 +401,6 @@ def update_fabric_capacity( sku: Optional[str] = None, admin_members: Optional[str | List[str]] = None, tags: Optional[dict] = None, - **kwargs, ): """ This function updates a Fabric capacity's properties. @@ -499,25 +425,9 @@ def update_fabric_capacity( Tag(s) to add to the capacity. Example: {'tagName': 'tagValue'}. """ - token_provider = auth.token_provider.get() - if token_provider is None: - token_provider = ServicePrincipalTokenProvider.from_azure_key_vault( - key_vault_uri=kwargs["key_vault_uri"], - key_vault_tenant_id=kwargs["key_vault_tenant_id"], - key_vault_client_id=kwargs["key_vault_client_id"], - key_vault_client_secret=kwargs["key_vault_client_secret"], - ) - print( - f"{icons.info} Please use the 'token_provider' parameter instead of the key vault parameters within this function as the key vault parameters have been deprecated." - ) - - headers = _get_headers(token_provider, audience="azure") - url = f"https://management.azure.com/subscriptions/{azure_subscription_id}/resourceGroups/{resource_group}/providers/Microsoft.Fabric/capacities/{capacity_name}?api-version={icons.azure_api_version}" - get_response = requests.get(url, headers=headers) - if get_response.status_code != 200: - raise FabricHTTPException(get_response) + get_response = _base_api(request=url, client="azure") get_json = get_response.json() current_sku = get_json.get("sku", {}).get("name") @@ -549,10 +459,9 @@ def update_fabric_capacity( return payload = _add_sll_tag(payload, tags) - response = requests.patch(url, headers=headers, json=payload) - - if response.status_code != 202: - raise FabricHTTPException(response) + _base_api( + request=url, client="azure", method="patch", payload=payload, status_codes=202 + ) print( f"{icons.green_dot} The '{capacity_name}' capacity has been updated accordingly." @@ -588,28 +497,13 @@ def check_fabric_capacity_name_availablility( An indication as to whether the Fabric capacity name is available or not. """ - token_provider = auth.token_provider.get() - if token_provider is None: - token_provider = ServicePrincipalTokenProvider.from_azure_key_vault( - key_vault_uri=kwargs["key_vault_uri"], - key_vault_tenant_id=kwargs["key_vault_tenant_id"], - key_vault_client_id=kwargs["key_vault_client_id"], - key_vault_client_secret=kwargs["key_vault_client_secret"], - ) - print( - f"{icons.info} Please use the 'token_provider' parameter instead of the key vault parameters within this function as the key vault parameters have been deprecated." - ) - - headers = _get_headers(token_provider, audience="azure") - payload = {"name": capacity_name, "type": "Microsoft.Fabric/capacities"} url = f"https://management.azure.com/subscriptions/{azure_subscription_id}/providers/Microsoft.Fabric/locations/{region}/checkNameAvailability?api-version={icons.azure_api_version}" - response = requests.post(url, headers=headers, json=payload) - - if response.status_code != 202: - raise FabricHTTPException(response) + response = _base_api( + request=url, client="azure", method="post", payload=payload, status_codes=202 + ) return bool(response.json().get("nameAvailable")) @@ -1236,3 +1130,39 @@ def get_resource_group(azure_subscription_id: str, resource_group: str) -> pd.Da } return pd.DataFrame(new_data, index=[0]) + + +def list_capacities() -> pd.DataFrame: + """ + Shows the capacities and their properties. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing the capacities and their properties + """ + + columns = { + "Id": "string", + "Display Name": "string", + "Sku": "string", + "Region": "string", + "State": "string", + "Admins": "string", + } + df = _create_dataframe(columns=columns) + + response = _base_api(request="/v1.0/myorg/capacities", client="fabric_sp") + + for i in response.json().get("value", []): + new_data = { + "Id": i.get("id").lower(), + "Display Name": i.get("displayName"), + "Sku": i.get("sku"), + "Region": i.get("region"), + "State": i.get("state"), + "Admins": [i.get("admins", [])], + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + return df diff --git a/src/sempy_labs/_capacity_migration.py b/src/sempy_labs/_capacity_migration.py index 20858445..82fb8545 100644 --- a/src/sempy_labs/_capacity_migration.py +++ b/src/sempy_labs/_capacity_migration.py @@ -16,6 +16,7 @@ _base_api, ) from sempy_labs._capacities import create_fabric_capacity +from uuid import UUID def _migrate_settings(source_capacity: str, target_capacity: str): @@ -105,17 +106,13 @@ def migrate_workspaces( migrated_workspaces = [] for i, r in dfW.iterrows(): - workspace = r["Name"] - - if workspaces is None or workspace in workspaces: - pass - else: - continue - - if assign_workspace_to_capacity( - capacity_name=target_capacity, workspace=workspace - ): - migrated_workspaces.append(workspace) + workspace_id = r["Id"] + workspace_name = r["Name"] + if workspaces is None or workspace_name in workspaces: + assign_workspace_to_capacity( + capacity=target_capacity, workspace=workspace_id + ) + migrated_workspaces.append(workspace_name) if len(migrated_workspaces) < workspace_count: print( @@ -123,10 +120,11 @@ def migrate_workspaces( ) print(f"{icons.in_progress} Initiating rollback...") for i, r in dfW.iterrows(): - workspace = r["Name"] - if workspace in migrated_workspaces: + workspace_id = r["Id"] + workspace_name = r["Name"] + if workspace_name in migrated_workspaces: assign_workspace_to_capacity( - capacity_name=source_capacity, workspace=workspace + capacity=source_capacity, workspace=workspace_id ) print( f"{icons.green_dot} Rollback of the workspaces to the '{source_capacity}' capacity is complete." @@ -531,7 +529,7 @@ def _migrate_delegated_tenant_settings(source_capacity: str, target_capacity: st @log -def _migrate_spark_settings(source_capacity: str, target_capacity: str): +def _migrate_spark_settings(source_capacity: str | UUID, target_capacity: str | UUID): """ This function migrates a capacity's spark settings to another capacity. @@ -539,14 +537,14 @@ def _migrate_spark_settings(source_capacity: str, target_capacity: str): Parameters ---------- - source_capacity : str - Name of the source capacity. - target_capacity : str - Name of the target capacity. + source_capacity : str | uuid.UUID + Name or ID of the source capacity. + target_capacity : str | uuid.UUID + Name or ID of the target capacity. """ - source_capacity_id = resolve_capacity_id(capacity_name=source_capacity) - target_capacity_id = resolve_capacity_id(capacity_name=target_capacity) + source_capacity_id = resolve_capacity_id(capacity=source_capacity) + target_capacity_id = resolve_capacity_id(capacity=target_capacity) # Get source capacity server dns response = _base_api(request=f"metadata/capacityInformation/{source_capacity_id}") diff --git a/src/sempy_labs/_clear_cache.py b/src/sempy_labs/_clear_cache.py index ef71fbf7..2c3608d2 100644 --- a/src/sempy_labs/_clear_cache.py +++ b/src/sempy_labs/_clear_cache.py @@ -59,6 +59,7 @@ def backup_semantic_model( allow_overwrite: bool = True, apply_compression: bool = True, workspace: Optional[str | UUID] = None, + password: Optional[str] = None, ): """ `Backs up `_ a semantic model to the ADLS Gen2 storage account connected to the workspace. @@ -72,6 +73,8 @@ def backup_semantic_model( Must end in '.abf'. Example 1: file_path = 'MyModel.abf' Example 2: file_path = 'MyFolder/MyModel.abf' + password : Optional[str], default=None + Password to encrypt the backup file. If None, no password is used. allow_overwrite : bool, default=True If True, overwrites backup files of the same name. If False, the file you are saving cannot have the same name as a file that already exists in the same location. apply_compression : bool, default=True @@ -99,6 +102,9 @@ def backup_semantic_model( } } + if password: + tmsl["backup"]["password"] = password # Add password only if provided + fabric.execute_tmsl(script=tmsl, workspace=workspace_id) print( f"{icons.green_dot} The '{dataset_name}' semantic model within the '{workspace_name}' workspace has been backed up to the '{file_path}' location." @@ -113,6 +119,7 @@ def restore_semantic_model( ignore_incompatibilities: bool = True, force_restore: bool = False, workspace: Optional[str | UUID] = None, + password: Optional[str] = None, ): """ `Restores `_ a semantic model based on a backup (.abf) file @@ -126,6 +133,8 @@ def restore_semantic_model( The location in which to backup the semantic model. Must end in '.abf'. Example 1: file_path = 'MyModel.abf' Example 2: file_path = 'MyFolder/MyModel.abf' + password : Optional[str], default=None + Password to decrypt the backup file. If None, no password is used. allow_overwrite : bool, default=True If True, overwrites backup files of the same name. If False, the file you are saving cannot have the same name as a file that already exists in the same location. ignore_incompatibilities : bool, default=True @@ -155,6 +164,9 @@ def restore_semantic_model( } } + if password: + tmsl["restore"]["password"] = password + if force_restore: tmsl["restore"]["forceRestore"] = force_restore diff --git a/src/sempy_labs/_connections.py b/src/sempy_labs/_connections.py index 78257aef..75584740 100644 --- a/src/sempy_labs/_connections.py +++ b/src/sempy_labs/_connections.py @@ -1,4 +1,3 @@ -import sempy.fabric as fabric import pandas as pd from typing import Optional from sempy_labs._helper_functions import ( @@ -7,6 +6,7 @@ _update_dataframe_datatypes, _base_api, _create_dataframe, + resolve_item_id, ) from uuid import UUID import sempy_labs._icons as icons @@ -230,9 +230,7 @@ def list_item_connections( (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) item_type = item_type[0].upper() + item_type[1:] - item_id = fabric.resolve_item_id( - item_name=item_name, type=item_type, workspace=workspace_id - ) + item_id = resolve_item_id(item=item_name, type=item_type, workspace=workspace_id) columns = { "Connection Name": "string", diff --git a/src/sempy_labs/_dashboards.py b/src/sempy_labs/_dashboards.py new file mode 100644 index 00000000..0bdfd949 --- /dev/null +++ b/src/sempy_labs/_dashboards.py @@ -0,0 +1,60 @@ +from typing import Optional +from uuid import UUID +import pandas as pd +from sempy_labs._helper_functions import ( + _create_dataframe, + _base_api, + resolve_workspace_name_and_id, + _update_dataframe_datatypes, +) + + +def list_dashboards(workspace: Optional[str | UUID] = None) -> pd.DataFrame: + """ + Shows a list of the dashboards within a workspace. + + Parameters + ---------- + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing the dashboards within a workspace. + """ + + columns = { + "Dashboard ID": "string", + "Dashboard Name": "string", + "Read Only": "bool", + "Web URL": "string", + "Embed URL": "string", + "Data Classification": "string", + "Users": "string", + "Subscriptions": "string", + } + df = _create_dataframe(columns=columns) + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + + response = _base_api(request=f"/v1.0/myorg/groups/{workspace_id}/dashboards") + + for v in response.json().get("value", []): + new_data = { + "Dashboard ID": v.get("id"), + "Dashboard Name": v.get("displayName"), + "Read Only": v.get("isReadOnly"), + "Web URL": v.get("webUrl"), + "Embed URL": v.get("embedUrl"), + "Data Classification": v.get("dataClassification"), + "Users": v.get("users"), + "Subscriptions": v.get("subscriptions"), + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + _update_dataframe_datatypes(dataframe=df, column_map=columns) + + return df diff --git a/src/sempy_labs/_data_pipelines.py b/src/sempy_labs/_data_pipelines.py index cc6b47a7..b5dfe500 100644 --- a/src/sempy_labs/_data_pipelines.py +++ b/src/sempy_labs/_data_pipelines.py @@ -1,13 +1,13 @@ -import sempy.fabric as fabric import pandas as pd from typing import Optional from sempy_labs._helper_functions import ( resolve_workspace_name_and_id, _decode_b64, _base_api, - _print_success, resolve_item_id, _create_dataframe, + delete_item, + create_item, ) from uuid import UUID @@ -76,25 +76,8 @@ def create_data_pipeline( or if no lakehouse attached, resolves to the workspace of the notebook. """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - - payload = {"displayName": name} - - if description: - payload["description"] = description - - _base_api( - request=f"/v1/workspaces/{workspace_id}/dataPipelines", - method="post", - payload=payload, - status_codes=[201, 202], - lro_return_status_code=True, - ) - _print_success( - item_name=name, - item_type="data pipeline", - workspace_name=workspace_name, - action="created", + create_item( + name=name, description=description, type="DataPipeline", workspace=workspace ) @@ -114,16 +97,7 @@ def delete_data_pipeline(name: str | UUID, workspace: Optional[str | UUID] = Non or if no lakehouse attached, resolves to the workspace of the notebook. """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - item_id = resolve_item_id(item=name, type="DataPipeline", workspace=workspace) - - fabric.delete_item(item_id=item_id, workspace=workspace) - _print_success( - item_name=name, - item_type="data pipeline", - workspace_name=workspace_name, - action="deleted", - ) + delete_item(item=name, type="DataPipeline", workspace=workspace) def get_data_pipeline_definition( diff --git a/src/sempy_labs/_dataflows.py b/src/sempy_labs/_dataflows.py index 353dd310..00ca06f3 100644 --- a/src/sempy_labs/_dataflows.py +++ b/src/sempy_labs/_dataflows.py @@ -1,4 +1,3 @@ -import sempy.fabric as fabric import pandas as pd from sempy_labs._helper_functions import ( resolve_workspace_name_and_id, @@ -6,6 +5,7 @@ _update_dataframe_datatypes, _base_api, _create_dataframe, + resolve_workspace_name, ) from typing import Optional, Tuple import sempy_labs._icons as icons @@ -187,7 +187,7 @@ def collect_upstreams(dataflow_id, dataflow_name, workspace_id, workspace_name): for v in values: tgt_dataflow_id = v.get("targetDataflowId") tgt_workspace_id = v.get("groupId") - tgt_workspace_name = fabric.resolve_workspace_name(tgt_workspace_id) + tgt_workspace_name = resolve_workspace_name(workspace_id=tgt_workspace_id) (tgt_dataflow_name, _) = _resolve_dataflow_name_and_id( dataflow=tgt_dataflow_id, workspace=tgt_workspace_id ) diff --git a/src/sempy_labs/_dax.py b/src/sempy_labs/_dax.py index c99f8d73..18559cd5 100644 --- a/src/sempy_labs/_dax.py +++ b/src/sempy_labs/_dax.py @@ -62,9 +62,23 @@ def evaluate_dax_impersonation( payload=payload, ) data = response.json()["results"][0]["tables"] - column_names = data[0]["rows"][0].keys() - data_rows = [row.values() for item in data for row in item["rows"]] - df = pd.DataFrame(data_rows, columns=column_names) + + # Get all possible column names from all rows because null columns aren't returned + all_columns = set() + for item in data: + for row in item["rows"]: + all_columns.update(row.keys()) + + # Create rows with all columns, filling missing values with None + rows = [] + for item in data: + for row in item["rows"]: + # Create a new row with all columns, defaulting to None + new_row = {col: row.get(col) for col in all_columns} + rows.append(new_row) + + # Create DataFrame from the processed rows + df = pd.DataFrame(rows) return df @@ -192,9 +206,15 @@ def get_dax_query_dependencies( ].reset_index(drop=True) if put_in_memory: - not_in_memory = dfC_filtered[dfC_filtered["Is Resident"] == False] + # Only put columns in memory if they are in a Direct Lake table (and are not already in memory) + dfP = fabric.list_partitions(dataset=dataset, workspace=workspace) + dl_tables = dfP[dfP["Mode"] == "DirectLake"]["Table Name"].unique().tolist() + not_in_memory = dfC_filtered[ + (dfC_filtered["Table Name"].isin(dl_tables)) + & (dfC_filtered["Is Resident"] == False) + ] - if len(not_in_memory) > 0: + if not not_in_memory.empty: _put_columns_into_memory( dataset=dataset, workspace=workspace, diff --git a/src/sempy_labs/_dax_query_view.py b/src/sempy_labs/_dax_query_view.py new file mode 100644 index 00000000..c0a3e037 --- /dev/null +++ b/src/sempy_labs/_dax_query_view.py @@ -0,0 +1,57 @@ +from typing import Optional +from uuid import UUID +from sempy_labs._helper_functions import ( + resolve_dataset_id, + _get_fabric_context_setting, + resolve_workspace_id, +) +from sempy._utils._log import log +import gzip +import base64 +import urllib.parse + + +@log +def generate_dax_query_view_url( + dataset: str | UUID, dax_string: str, workspace: Optional[str | UUID] = None +): + """ + Prints a URL based on query provided. This URL opens `DAX query view `_ in the Power BI service, connected to the semantic model and using the query provided. + + Parameters + ---------- + dataset : str | uuid.UUID + The semantic model name or ID. + dax_string : str + The DAX query string. + workspace : str | uuid.UUID, default=None + The workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + workspace_id = resolve_workspace_id(workspace=workspace) + dataset_id = resolve_dataset_id(dataset=dataset, workspace=workspace_id) + + prefix = _get_fabric_context_setting(name="spark.trident.pbienv").lower() + + if prefix == "prod": + prefix = "app" + + def gzip_base64_urlsafe(input_string): + # Compress the string with gzip + compressed_data = gzip.compress(input_string.encode("utf-8")) + + # Encode the compressed data in base64 + base64_data = base64.b64encode(compressed_data) + + # Make the base64 string URL-safe + urlsafe_data = urllib.parse.quote_plus(base64_data.decode("utf-8")) + + return urlsafe_data + + formatted_query = gzip_base64_urlsafe(dax_string) + + url = f"https://{prefix}.powerbi.com/groups/{workspace_id}/modeling/{dataset_id}/daxQueryView?query={formatted_query}" + + print(url) diff --git a/src/sempy_labs/_daxformatter.py b/src/sempy_labs/_daxformatter.py new file mode 100644 index 00000000..0b3ceff5 --- /dev/null +++ b/src/sempy_labs/_daxformatter.py @@ -0,0 +1,78 @@ +import requests +from typing import List, Optional +from sempy_labs._a_lib_info import lib_name, lib_version + + +def _format_dax( + expressions: str | List[str], + skip_space_after_function_name: bool = False, + metadata: Optional[List[dict]] = None, +) -> List[str]: + + if isinstance(expressions, str): + expressions = [expressions] + metadata = [metadata] if metadata else [{}] + + # Add variable assignment to each expression + expressions = [f"x :={item}" for item in expressions] + + url = "https://daxformatter.azurewebsites.net/api/daxformatter/daxtextformatmulti" + + payload = { + "Dax": expressions, + "MaxLineLength": 0, + "SkipSpaceAfterFunctionName": skip_space_after_function_name, + "ListSeparator": ",", + "DecimalSeparator": ".", + } + + headers = { + "Accept": "application/json, text/javascript, */*; q=0.01", + "Accept-Encoding": "gzip,deflate", + "Accept-Language": "en-US,en;q=0.8", + "Content-Type": "application/json; charset=UTF-8", + "Host": "daxformatter.azurewebsites.net", + "Expect": "100-continue", + "Connection": "Keep-Alive", + "CallerApp": lib_name, + "CallerVersion": lib_version, + } + + response = requests.post(url, json=payload, headers=headers) + result = [] + for idx, dax in enumerate(response.json()): + formatted_dax = dax.get("formatted") + errors = dax.get("errors") + if errors: + meta = metadata[idx] if metadata and idx < len(metadata) else {} + obj_name = meta.get("name", "Unknown") + table_name = meta.get("table", "Unknown") + obj_type = meta.get("type", "Unknown") + if obj_type == "calculated_tables": + raise ValueError( + f"DAX formatting failed for the '{obj_name}' calculated table: {errors}" + ) + elif obj_type == "calculated_columns": + raise ValueError( + f"DAX formatting failed for the '{table_name}'[{obj_name}] calculated column: {errors}" + ) + elif obj_type == "calculation_items": + raise ValueError( + f"DAX formatting failed for the '{table_name}'[{obj_name}] calculation item: {errors}" + ) + elif obj_type == "measures": + raise ValueError( + f"DAX formatting failed for the '{obj_name}' measure: {errors}" + ) + elif obj_type == "rls": + raise ValueError( + f"DAX formatting failed for the row level security expression on the '{table_name}' table within the '{obj_name}' role: {errors}" + ) + else: + NotImplementedError() + else: + if formatted_dax.startswith("x :="): + formatted_dax = formatted_dax[4:] + formatted_dax = formatted_dax.strip() + result.append(formatted_dax) + return result diff --git a/src/sempy_labs/_delta_analyzer.py b/src/sempy_labs/_delta_analyzer.py index a1a305fc..4184e8ea 100644 --- a/src/sempy_labs/_delta_analyzer.py +++ b/src/sempy_labs/_delta_analyzer.py @@ -1,7 +1,9 @@ import pandas as pd -import datetime +import re +from datetime import datetime +import os +from uuid import UUID from typing import Dict, Optional -import pyarrow.dataset as ds import pyarrow.parquet as pq from sempy_labs._helper_functions import ( create_abfss_path, @@ -12,21 +14,52 @@ resolve_workspace_name_and_id, resolve_lakehouse_name_and_id, _read_delta_table, - _delta_table_row_count, _mount, + _read_delta_table_history, + resolve_workspace_id, + resolve_lakehouse_id, + _get_delta_table, ) +from sempy._utils._log import log from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables -from sempy_labs.lakehouse._lakehouse import lakehouse_attached +from sempy_labs.lakehouse._lakehouse import ( + lakehouse_attached, +) +from sempy_labs.lakehouse._helper import ( + is_v_ordered, +) import sempy_labs._icons as icons -from uuid import UUID +from tqdm.auto import tqdm + + +def get_parquet_file_infos(path): + + import notebookutils + + files = [] + items = notebookutils.fs.ls(path) + for item in items: + if item.isDir: + # Ignore the _delta_log directory + if "_delta_log" not in item.path: + files.extend(get_parquet_file_infos(item.path)) + else: + # Filter out non-Parquet files and files with size 0 + if item.path.endswith(".parquet") and item.size > 0: + files.append((item.path, item.size)) + return files +@log def delta_analyzer( table_name: str, approx_distinct_count: bool = True, export: bool = False, lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None, + column_stats: bool = True, + skip_cardinality: bool = True, + schema: Optional[str] = None, ) -> Dict[str, pd.DataFrame]: """ Analyzes a delta table and shows the results in dictionary containing a set of 5 dataframes. If 'export' is set to True, the results will be saved to delta tables in the lakehouse attached to the notebook. @@ -56,94 +89,141 @@ def delta_analyzer( The Fabric workspace name or ID used by the lakehouse. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. + column_stats : bool, default=True + If True, collects data about column chunks and columns. If False, skips that step and only returns the other 3 dataframes. + skip_cardinality : bool, default=True + If True, skips the cardinality calculation for each column. If False, calculates the cardinality for each column. + schema : str, default=None + The name of the schema to which the table belongs (for schema-enabled lakehouses). If None, the default schema is used. Returns ------- Dict[str, pandas.DataFrame] A dictionary of pandas dataframes showing semantic model objects which violated the best practice analyzer rules. """ - import notebookutils - - # display_toggle = notebookutils.common.configs.pandas_display - # Turn off notebookutils display - # if display_toggle is True: - # notebookutils.common.configs.pandas_display = False + # Must calculate column stats if calculating cardinality + if not skip_cardinality: + column_stats = True prefix = "SLL_DeltaAnalyzer_" - now = datetime.datetime.now() + now = datetime.now() (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace=workspace) (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id( lakehouse=lakehouse, workspace=workspace ) - path = create_abfss_path(lakehouse_id, workspace_id, table_name) - local_path = _mount(lakehouse=lakehouse, workspace=workspace) - table_path = f"{local_path}/Tables/{table_name}" - # Set back to original value - # notebookutils.common.configs.pandas_display = display_toggle + delta_table_path = create_abfss_path( + lakehouse_id, workspace_id, table_name, schema=schema + ) + local_path = _mount(lakehouse=lakehouse, workspace=workspace) parquet_file_df_columns = { - "ParquetFile": "string", - "RowCount": "int", - "RowGroups": "int", + # "Dataset": "string", + "Parquet File": "string", + "Row Count": "int", + "Row Groups": "int", + "Created By": "string", + "Total Table Rows": "int", + "Total Table Row Groups": "int", } row_group_df_columns = { - "ParquetFile": "string", - "RowGroupID": "int", - "RowCount": "int", - "CompressedSize": "int", - "UncompressedSize": "int", - "CompressionRatio": "float", + # "Dataset": "string", + "Parquet File": "string", + "Row Group ID": "int", + "Row Count": "int", + "Compressed Size": "int", + "Uncompressed Size": "int", + "Compression Ratio": "float", + "Total Table Rows": "int", + "Ratio Of Total Table Rows": "float", + "Total Table Row Groups": "int", } column_chunk_df_columns = { - "ParquetFile": "string", - "ColumnID": "int", - "ColumnName": "string", - "ColumnType": "string", - "CompressedSize": "int", - "UncompressedSize": "int", - "HasDict": "bool", - "DictOffset": "int_fillna", - "ValueCount": "int", + # "Dataset": "string", + "Parquet File": "string", + "Column ID": "int", + "Column Name": "string", + "Column Type": "string", + "Compressed Size": "int", + "Uncompressed Size": "int", + "Has Dict": "bool", + "Dict Offset": "int_fillna", + "Value Count": "int", "Encodings": "string", + "Statistics": "string", + "Primative Type": "string", } parquet_file_df = _create_dataframe(columns=parquet_file_df_columns) row_group_df = _create_dataframe(columns=row_group_df_columns) column_chunk_df = _create_dataframe(columns=column_chunk_df_columns) - # delta_table = DeltaTable.forPath(spark, path) - # detail_df = spark.sql(f"DESCRIBE DETAIL `{table_name}`").collect()[0] - - # num_files = detail_df.numFiles - # size_in_bytes = detail_df.sizeInBytes - - latest_files = _read_delta_table(path).inputFiles() - file_paths = [f.split("/")[-1] for f in latest_files] - row_count = _delta_table_row_count(table_name) row_groups = 0 max_rows_per_row_group = 0 min_rows_per_row_group = float("inf") - schema = ds.dataset(table_path).schema.metadata - is_vorder = any(b"vorder" in key for key in schema.keys()) + is_vorder = is_v_ordered( + table_name=table_name, lakehouse=lakehouse, workspace=workspace, schema=schema + ) + + # Get the common details of the Delta table + delta_table = _get_delta_table(delta_table_path) + table_df = delta_table.toDF() + # total_partition_count = table_df.rdd.getNumPartitions() + row_count = table_df.count() + table_details = delta_table.detail().collect()[0].asDict() + # created_at = table_details.get("createdAt") + # last_modified = table_details.get("lastModified") + # partition_columns = table_details.get("partitionColumns") + # clustering_columns = table_details.get("clusteringColumns") + num_latest_files = table_details.get("numFiles", 0) + # size_in_bytes = table_details.get("sizeInBytes") + # min_reader_version = table_details.get("minReaderVersion") + # min_writer_version = table_details.get("minWriterVersion") + + latest_files = _read_delta_table(delta_table_path).inputFiles() + # file_paths = [f.split("/")[-1] for f in latest_files] + all_parquet_files = get_parquet_file_infos(delta_table_path) + common_file_paths = set( + [file_info[0] for file_info in all_parquet_files] + ).intersection(set(latest_files)) + latest_version_files = [ + file_info + for file_info in all_parquet_files + if file_info[0] in common_file_paths + ] + + for idx, (file_path, file_size) in enumerate( + bar := tqdm(latest_version_files), start=1 + ): + file_name = os.path.basename(file_path) + bar.set_description( + f"Analyzing the '{file_name}' parquet file ({idx}/{num_latest_files})..." + ) + + relative_path = file_path.split("Tables/")[1] + file_system_path = f"{local_path}/Tables/{relative_path}" + parquet_file = pq.ParquetFile(file_system_path) - for file_name in file_paths: - parquet_file = pq.ParquetFile(f"{table_path}/{file_name}") row_groups += parquet_file.num_row_groups # Generate rowgroup dataframe new_data = { - "ParquetFile": file_name, - "RowCount": parquet_file.metadata.num_rows, - "RowGroups": parquet_file.num_row_groups, + # "Dataset": "Parquet Files", + "Parquet File": file_name, + "Row Count": parquet_file.metadata.num_rows, + "Row Groups": parquet_file.num_row_groups, + "Created By": parquet_file.metadata.created_by, + "Total Table Rows": -1, + "Total Table Row Groups": -1, } parquet_file_df = pd.concat( [parquet_file_df, pd.DataFrame(new_data, index=[0])], ignore_index=True ) + # Loop through the row groups for i in range(parquet_file.num_row_groups): row_group = parquet_file.metadata.row_group(i) num_rows = row_group.num_rows @@ -154,38 +234,50 @@ def delta_analyzer( total_compressed_size = 0 total_uncompressed_size = 0 - for j in range(row_group.num_columns): - column_chunk = row_group.column(j) - total_compressed_size += column_chunk.total_compressed_size - total_uncompressed_size += column_chunk.total_uncompressed_size - - # Generate Column Chunk Dataframe - new_data = { - "ParquetFile": file_name, - "ColumnID": j, - "ColumnName": column_chunk.path_in_schema, - "ColumnType": column_chunk.physical_type, - "CompressedSize": column_chunk.total_compressed_size, - "UncompressedSize": column_chunk.total_uncompressed_size, - "HasDict": column_chunk.has_dictionary_page, - "DictOffset": column_chunk.dictionary_page_offset, - "ValueCount": column_chunk.num_values, - "Encodings": str(column_chunk.encodings), - } - - column_chunk_df = pd.concat( - [column_chunk_df, pd.DataFrame(new_data, index=[0])], - ignore_index=True, - ) + # Loop through the columns + if column_stats: + for j in range(row_group.num_columns): + column_chunk = row_group.column(j) + total_compressed_size += column_chunk.total_compressed_size + total_uncompressed_size += column_chunk.total_uncompressed_size + + # Generate Column Chunk Dataframe + new_data = { + # "Dataset": "Column Chunks", + "Parquet File": file_name, + "Column ID": j, + "Column Name": column_chunk.path_in_schema, + "Column Type": column_chunk.physical_type, + "Compressed Size": column_chunk.total_compressed_size, + "Uncompressed Size": column_chunk.total_uncompressed_size, + "Has Dict": column_chunk.has_dictionary_page, + "Dict Offset": column_chunk.dictionary_page_offset, + "Value Count": column_chunk.num_values, + "Encodings": str(column_chunk.encodings), + "Statistics": column_chunk.statistics, + "PrimativeType": column_chunk.physical_type, + } + + column_chunk_df = pd.concat( + [column_chunk_df, pd.DataFrame(new_data, index=[0])], + ignore_index=True, + ) # Generate rowgroup dataframe new_data = { - "ParquetFile": file_name, - "RowGroupID": i + 1, - "RowCount": num_rows, - "CompressedSize": total_compressed_size, - "UncompressedSize": total_uncompressed_size, - "CompressionRatio": total_compressed_size / total_uncompressed_size, + # "Dataset": "Row Groups", + "Parquet File": file_name, + "Row Group ID": i + 1, + "Row Count": num_rows, + "Compressed Size": total_compressed_size, + "Uncompressed Size": total_uncompressed_size, + "Compression Ratio": ( + total_compressed_size / total_uncompressed_size + if column_stats + else 0 + ), + "Total Table Rows": -1, + "Total Table Row Groups": -1, } if not row_group_df.empty: @@ -201,68 +293,89 @@ def delta_analyzer( summary_df = pd.DataFrame( [ { - "RowCount": row_count, - "RowGroups": row_groups, - "ParquetFiles": len(file_paths), - "MaxRowsPerRowGroup": max_rows_per_row_group, - "MinRowsPerRowGroup": min_rows_per_row_group, - "AvgRowsPerRowGroup": avg_rows_per_row_group, - "VOrderEnabled": is_vorder, + # "Dataset": "Summary", + "Row Count": row_count, + "Row Groups": row_groups, + "Parquet Files": num_latest_files, + "Max Rows Per Row Group": max_rows_per_row_group, + "Min Rows Per Row Group": min_rows_per_row_group, + "Avg Rows Per Row Group": avg_rows_per_row_group, + "VOrder Enabled": is_vorder, # "VOrderLevel": v_order_level, } ] ) # Clean up data types - _update_dataframe_datatypes( - dataframe=column_chunk_df, column_map=column_chunk_df_columns - ) _update_dataframe_datatypes(dataframe=row_group_df, column_map=row_group_df_columns) _update_dataframe_datatypes( dataframe=parquet_file_df, column_map=parquet_file_df_columns ) # Generate column dataframe - column_df = column_chunk_df.groupby( - ["ColumnName", "ColumnType"], as_index=False - ).agg({"CompressedSize": "sum", "UncompressedSize": "sum"}) - - # Add distinct count to column_df - for ind, r in column_df.iterrows(): - col_name = r["ColumnName"] - if approx_distinct_count: - dc = _get_column_aggregate( - table_name=table_name, - column_name=col_name, - function="approx", - lakehouse=lakehouse, - workspace=workspace, - ) - else: - dc = _get_column_aggregate( - table_name=table_name, - column_name=col_name, - function="distinctcount", - lakehouse=lakehouse, - workspace=workspace, - ) + if column_stats: + _update_dataframe_datatypes( + dataframe=column_chunk_df, column_map=column_chunk_df_columns + ) + column_df = column_chunk_df.groupby( + ["Column Name", "Column Type"], as_index=False + ).agg({"Compressed Size": "sum", "Uncompressed Size": "sum"}) + + # Add distinct count to column_df + if not skip_cardinality: + for ind, r in column_df.iterrows(): + col_name = r["Column Name"] + if approx_distinct_count: + function = "approx" + else: + function = "distinctcount" + dc = _get_column_aggregate( + table_name=table_name, + column_name=col_name, + function=function, + lakehouse=lakehouse, + workspace=workspace, + ) + + if "Cardinality" not in column_df.columns: + column_df["Cardinality"] = None + + column_df.at[ind, "Cardinality"] = dc - if "Cardinality" not in column_df.columns: - column_df["Cardinality"] = None + summary_df["Total Size"] = column_df["Compressed Size"].sum() - column_df.at[ind, "Cardinality"] = dc + parquet_file_df["Total Table Rows"] = parquet_file_df["Row Count"].sum() + parquet_file_df["Total Table Row Groups"] = parquet_file_df["Row Groups"].sum() - column_df["Cardinality"] = column_df["Cardinality"].astype(int) - summary_df["TotalSize"] = column_df["CompressedSize"].sum() + row_group_df["Total Table Rows"] = parquet_file_df["Row Count"].sum() + row_group_df["Total Table Row Groups"] = parquet_file_df["Row Groups"].sum() + total_rows = row_group_df["Row Count"].sum() + row_group_df["Ratio Of Total Table Rows"] = ( + row_group_df["Row Count"] / total_rows * 100.0 + ) + + if column_stats: + column_df["Total Table Rows"] = parquet_file_df["Row Count"].sum() + column_df["Table Size"] = column_df["Compressed Size"].sum() + column_df["Size Percent Of Table"] = ( + column_df["Compressed Size"] / column_df["Table Size"] * 100.0 + ) + if not skip_cardinality and column_stats: + column_df["Cardinality"] = column_df["Cardinality"].fillna(0).astype(int) + column_df["Cardinality Of Total Rows"] = ( + column_df["Cardinality"] / column_df["Total Table Rows"] * 100.0 + ) dataframes = { "Summary": summary_df, "Parquet Files": parquet_file_df, "Row Groups": row_group_df, - "Column Chunks": column_chunk_df, - "Columns": column_df, } + if column_stats: + dataframes["Column Chunks"] = column_chunk_df + dataframes["Columns"] = column_df + save_table = f"{prefix}Summary" if export: @@ -283,11 +396,11 @@ def delta_analyzer( for name, df in dataframes.items(): name = name.replace(" ", "") cols = { - "WorkspaceName": workspace_name, - "WorkspaceId": workspace_id, - "LakehouseName": lakehouse_name, - "LakehouseId": lakehouse_id, - "TableName": table_name, + "Workspace Name": workspace_name, + "Workspace Id": workspace_id, + "Lakehouse Name": lakehouse_name, + "Lakehouse Id": lakehouse_id, + "Table Name": table_name, } for i, (col, param) in enumerate(cols.items()): df[col] = param @@ -297,8 +410,10 @@ def delta_analyzer( df["Timestamp"] = pd.to_datetime(df["Timestamp"]) if export: - df["RunId"] = runId - df["RunId"] = df["RunId"].astype(int) + df["Run Id"] = runId + df["Run Id"] = df["Run Id"].astype(int) + + df.columns = df.columns.str.replace(" ", "") save_as_delta_table( dataframe=df, delta_table_name=f"{prefix}{name}", @@ -307,3 +422,45 @@ def delta_analyzer( ) return dataframes + + +@log +def get_delta_table_history( + table_name: str, + lakehouse: Optional[str | UUID] = None, + workspace: Optional[str | UUID] = None, + schema: Optional[str] = None, +) -> pd.DataFrame: + """ + Returns the history of a delta table as a pandas dataframe. + + Parameters + ---------- + table_name : str + The delta table name. + lakehouse : str | uuid.UUID, default=None + The Fabric lakehouse name or ID. + Defaults to None which resolves to the lakehouse attached to the notebook. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID used by the lakehouse. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + schema : str, default=None + The name of the schema to which the table belongs (for schema-enabled lakehouses). If None, the default schema is used. + + Returns + ------- + pandas.DataFrame + A dataframe showing the history of the delta table. + """ + + def camel_to_title(text): + return re.sub(r"([a-z])([A-Z])", r"\1 \2", text).title() + + workspace_id = resolve_workspace_id(workspace=workspace) + lakehouse_id = resolve_lakehouse_id(lakehouse=lakehouse, workspace=workspace_id) + path = create_abfss_path(lakehouse_id, workspace_id, table_name, schema) + df = _read_delta_table_history(path=path) + df.rename(columns=lambda col: camel_to_title(col), inplace=True) + + return df diff --git a/src/sempy_labs/_delta_analyzer_history.py b/src/sempy_labs/_delta_analyzer_history.py new file mode 100644 index 00000000..421c91e7 --- /dev/null +++ b/src/sempy_labs/_delta_analyzer_history.py @@ -0,0 +1,298 @@ +import pandas as pd +from typing import Optional +import pyarrow.parquet as pq +from sempy_labs._helper_functions import ( + create_abfss_path, + resolve_workspace_id, + resolve_lakehouse_id, + _mount, +) +from sempy._utils._log import log +from tqdm.auto import tqdm +from uuid import UUID +from datetime import datetime + + +@log +def delta_analyzer_history( + table_name: str, + schema: Optional[str] = None, + lakehouse: Optional[str | UUID] = None, + workspace: Optional[str | UUID] = None, +) -> pd.DataFrame: + """ + Analyzes the transaction log for a specified delta table and shows the results in dataframe. One row per data modification operation. + + Keeps track on the number of Parquet files, rowgroups, file size and #rows impacted by each change. + + Incremental Framing effect: 100% = highly effective, 0% = no benefit at all + + Parameters + ---------- + table_name : str + The delta table name. + schema : str, default=None + The schema name of the delta table. + lakehouse : str | uuid.UUID, default=None + The Fabric lakehouse name or ID. + Defaults to None which resolves to the lakehouse attached to the notebook. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID used by the lakehouse. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + Displays a gantt visual showing a timeline for individual parquet files. + """ + + import notebookutils + from IPython.display import display, HTML + + workspace_id = resolve_workspace_id(workspace=workspace) + lakehouse_id = resolve_lakehouse_id(lakehouse=lakehouse, workspace=workspace) + + table_path = create_abfss_path(lakehouse_id, workspace_id, table_name, schema) + local_path = _mount(lakehouse=lakehouse, workspace=workspace) + table_path_local = f"{local_path}/Tables/{table_name}" + delta_table_path = f"{table_path}/_delta_log" + + files = notebookutils.fs.ls(delta_table_path) + json_files = [file.name for file in files if file.name.endswith(".json")] + + element_version = total_size = total_rows = total_files = total_rowgroups = 0 + changes_array = [] + parquet_files = [] + my_date_time_format = "%Y-%m-%d %H:%M:%S.%f" + now_to_epoch = datetime.now().strftime(my_date_time_format) + num_latest_files = len(json_files) + + for idx, file in enumerate(bar := tqdm(json_files), start=1): + bar.set_description( + f"Analyzing the '{file}' parquet file ({idx}/{num_latest_files})..." + ) + + change_timestamp = datetime.strptime( + "2001-01-01 12:00:00.000", my_date_time_format + ) + df = pd.read_json(f"{delta_table_path}/{file}", lines=True) + + rows_added = size_added = rows_deleted = size_deleted = files_added = ( + files_removed + ) = row_groups_added = row_groups_removed = 0 + total_files_before_change = total_files + total_row_groups_before_change = total_rowgroups + operation = predicate = tags = "" + + for _, row in df.iterrows(): + add_row = row.get("add") + remove_row = row.get("remove") + commit_row = row.get("commitInfo") + + if isinstance(add_row, dict): + file_name = add_row["path"] + fs_filename = f"{table_path}/{file_name}" + size_added += add_row["size"] + files_added += 1 + filerows_added = 0 + + if notebookutils.fs.exists(fs_filename): + parquet_file = pq.ParquetFile(table_path_local + f"/{file_name}") + for i in range(parquet_file.num_row_groups): + row_group = parquet_file.metadata.row_group(i) + num_rows = row_group.num_rows + filerows_added += num_rows + rows_added += num_rows + + row_groups_added += parquet_file.num_row_groups + + start = str( + datetime.fromtimestamp(add_row["modificationTime"] / 1000.0) + ) + parquet_files.append( + { + "file": file_name, + "start": start, + "end": now_to_epoch, + "rows": filerows_added, + "isCurrent": 1, + } + ) + + if isinstance(remove_row, dict): + file_name = remove_row["path"] + fs_filename = f"{table_path}/{file_name}" + + if notebookutils.fs.exists(fs_filename): + parquet_file = pq.ParquetFile(table_path_local + f"/{file_name}") + for i in range(parquet_file.num_row_groups): + row_group = parquet_file.metadata.row_group(i) + num_rows = row_group.num_rows + rows_deleted += num_rows + + files_removed += 1 + size_deleted += remove_row.get("size", 0) + row_groups_removed += parquet_file.num_row_groups + + result = next( + (row for row in parquet_files if row["file"] == file_name), None + ) + if result: + result.update( + { + "isCurrent": 0, + "end": str( + datetime.fromtimestamp( + remove_row["deletionTimestamp"] / 1000.0 + ) + ), + } + ) + + if isinstance(commit_row, dict): + operation = commit_row.get("operation") + tags = commit_row.get("tags") + predicate = commit_row.get("operationParameters", {}).get("predicate") + + if operation == "VACUUM START": + operation_metrics = commit_row.get("operationMetrics", {}) + total_files -= int(operation_metrics.get("numFilesToDelete", 0)) + total_size -= int(operation_metrics.get("sizeOfDataToDelete", 0)) + + change_timestamp = datetime.fromtimestamp( + commit_row["timestamp"] / 1000.0 + ) + + total_size += size_added - size_deleted + total_rows += rows_added - rows_deleted + total_files += files_added - files_removed + total_rowgroups += row_groups_added - row_groups_removed + + incremental_framing_effect = 1 + if size_deleted != 0: + incremental_framing_effect = ( + int((total_size - size_added * 1.0) / total_size * 100000) / 1000 + ) + # incrementalFramingEffect = round( + # (totalSize - sizeAdded * 1.0) / totalSize, 4 + # ) + + changes_array.append( + [ + element_version, + operation, + predicate, + change_timestamp, + incremental_framing_effect, + files_added, + files_removed, + total_files_before_change - files_removed, + total_files, + size_added, + size_deleted, + total_size, + row_groups_added, + row_groups_removed, + total_row_groups_before_change - row_groups_removed, + total_rowgroups, + rows_added, + rows_deleted, + rows_added - rows_deleted, + total_rows, + tags, + ] + ) + + element_version += 1 + + # /******************************************************************************************************************** + # Display Gantt Chart of files + # ********************************************************************************************************************/ + spec: str = ( + """{ + "$$schema": 'https://vega.github.io/schema/vega-lite/v2.json', + "description": "A simple bar chart with ranged data (aka Gantt Chart).", + "width" : 1024 , + "data": { + "values": %s + }, + "layer":[ + {"mark": "bar"}, + {"mark": { + "type": "text", + "align": "center", + "baseline": "middle", + "dx": 40 + }, + "encoding": { + "text": {"field": "rows", "type": "quantitative", "format":","}, + "color":{ + "condition": {"test": "datum['isCurrent'] == 1", "value": "black"}, + "value": "black" + } + } + }], + "encoding": { + "y": {"field": "file", "type": "ordinal","sort": "isCurrent","title":null,"axis":{"labelPadding":15,"labelLimit":360}}, + "x": {"field": "start", "type": "temporal","title":null}, + "x2": {"field": "end", "type": "temporal","title":null}, + "color": { + "field": "isCurrent", + "scale": {"range": ["silver", "#ca8861"]} + } + } + }""" + % (parquet_files) + ) + + display( + HTML( + """ + + + + + + + + +
+ + + """ + ) + ) + + return pd.DataFrame( + changes_array, + columns=[ + "Change Number", + "Change Type", + "Predicate", + "Modification Time", + "Incremental Effect", + "Files Added", + "Files Removed", + "Files Preserved", + "Files After Change", + "Size Added", + "Sized Removed", + "Size After Change", + "Rowgroups Added", + "Rowgroups Removed", + "Rowgroups Preserved", + "Rowgroups After Change", + "Rows Added", + "Rows Removed", + "Rows Delta", + "Rows After Change", + "Tags", + ], + ) diff --git a/src/sempy_labs/_dictionary_diffs.py b/src/sempy_labs/_dictionary_diffs.py new file mode 100644 index 00000000..16142571 --- /dev/null +++ b/src/sempy_labs/_dictionary_diffs.py @@ -0,0 +1,221 @@ +import re +import json +import difflib +from collections import defaultdict + + +def color_text(text, color_code): + return f"\033[{color_code}m{text}\033[0m" + + +def stringify(payload): + try: + if isinstance(payload, list): + return ( + "[\n" + ",\n".join(f" {json.dumps(item)}" for item in payload) + "\n]" + ) + return json.dumps(payload, indent=2, sort_keys=True) + except Exception: + return str(payload) + + +def extract_top_level_group(path): + # For something like: resourcePackages[1].items[1].name → resourcePackages[1].items[1] + segments = re.split(r"\.(?![^[]*\])", path) # split on dots not in brackets + return ".".join(segments[:-1]) if len(segments) > 1 else segments[0] + + +def get_by_path(obj, path): + """Navigate into nested dict/list based on a dot/bracket path like: a.b[1].c""" + tokens = re.findall(r"\w+|\[\d+\]", path) + for token in tokens: + if token.startswith("["): + index = int(token[1:-1]) + obj = obj[index] + else: + obj = obj.get(token) + return obj + + +def deep_diff(d1, d2, path=""): + diffs = [] + if isinstance(d1, dict) and isinstance(d2, dict): + keys = set(d1) | set(d2) + for key in sorted(keys): + new_path = f"{path}.{key}" if path else key + if key not in d1: + diffs.append(("+", new_path, None, d2[key])) + elif key not in d2: + diffs.append(("-", new_path, d1[key], None)) + else: + diffs.extend(deep_diff(d1[key], d2[key], new_path)) + elif isinstance(d1, list) and isinstance(d2, list): + min_len = min(len(d1), len(d2)) + list_changed = False + for i in range(min_len): + if d1[i] != d2[i]: + list_changed = True + break + if list_changed or len(d1) != len(d2): + diffs.append(("~", path, d1, d2)) + elif d1 != d2: + diffs.append(("~", path, d1, d2)) + return diffs + + +def diff_parts(d1, d2): + + def build_path_map(parts): + return {part["path"]: part["payload"] for part in parts} + + try: + paths1 = build_path_map(d1) + except Exception: + paths1 = d1 + try: + paths2 = build_path_map(d2) + except Exception: + paths2 = d2 + all_paths = set(paths1) | set(paths2) + + for part_path in sorted(all_paths): + p1 = paths1.get(part_path) + p2 = paths2.get(part_path) + + if p1 is None: + print(color_text(f"+ {part_path}", "32")) # Green + continue + elif p2 is None: + print(color_text(f"- {part_path}", "31")) # Red + continue + elif p1 == p2: + continue + + if p1 is None or p2 is None: + print( + color_text(f"+ {part_path}", "32") + if p2 and not p1 + else color_text(f"- {part_path}", "31") + ) + continue + + # Header for the changed part + print(color_text(f"~ {part_path}", "33")) + + # Collect diffs + diffs = deep_diff(p1, p2) + # If the diff is only a change of a whole list (like appending to a list), group it under its key + merged_list_diffs = [] + for change_type, full_path, old_val, new_val in diffs: + if ( + change_type == "~" + and isinstance(old_val, list) + and isinstance(new_val, list) + ): + merged_list_diffs.append((change_type, full_path, old_val, new_val)) + + # Replace individual item diffs with unified list diff + if merged_list_diffs: + diffs = merged_list_diffs + + # Group diffs by common parent path (e.g. items[1]) + grouped = defaultdict(list) + for change_type, full_path, old_val, new_val in diffs: + group_path = extract_top_level_group(full_path) + grouped[group_path].append((change_type, full_path, old_val, new_val)) + + # Print each group once with unified diff for the full substructure + for group_path in sorted(grouped): + print(" " + color_text(f"~ {group_path}", "33")) + + try: + old_group = get_by_path(p1, group_path) + new_group = get_by_path(p2, group_path) + except Exception: + old_group = new_group = None + + # Skip showing diffs for empty/null groups + if isinstance(old_group, dict) and isinstance(new_group, dict): + old_keys = set(old_group.keys()) + new_keys = set(new_group.keys()) + + for key in sorted(old_keys - new_keys): + print( + " " + + color_text(f"- {key}: {json.dumps(old_group[key])}", "31") + ) + for key in sorted(new_keys - old_keys): + print( + " " + + color_text(f"+ {key}: {json.dumps(new_group[key])}", "32") + ) + for key in sorted(old_keys & new_keys): + if old_group[key] != new_group[key]: + print(" " + color_text(f"~ {key}:", "33")) + old_val_str = stringify(old_group[key]).splitlines() + new_val_str = stringify(new_group[key]).splitlines() + for line in difflib.unified_diff( + old_val_str, + new_val_str, + fromfile="old", + tofile="new", + lineterm="", + ): + if line.startswith("@@"): + print(" " + color_text(line, "36")) + elif line.startswith("-") and not line.startswith("---"): + print(" " + color_text(line, "31")) + elif line.startswith("+") and not line.startswith("+++"): + print(" " + color_text(line, "32")) + elif old_group is None and new_group is not None: + if isinstance(new_group, dict): + # print all added keys + for key, val in new_group.items(): + print(" " + color_text(f"+ {key}: {json.dumps(val)}", "32")) + elif isinstance(new_group, list): + old_str = [] + new_str = stringify(new_group).splitlines() + for line in difflib.unified_diff( + old_str, new_str, fromfile="old", tofile="new", lineterm="" + ): + if line.startswith("@@"): + print(" " + color_text(line, "36")) + elif line.startswith("-") and not line.startswith("---"): + print(" " + color_text(line, "31")) + elif line.startswith("+") and not line.startswith("+++"): + print(" " + color_text(line, "32")) + else: + print(" " + color_text(f"+ {json.dumps(new_group)}", "32")) + + elif new_group is None and old_group is not None: + if isinstance(old_group, dict): + # print all removed keys + for key, val in old_group.items(): + print(" " + color_text(f"- {key}: {json.dumps(val)}", "31")) + elif isinstance(old_group, list): + old_str = stringify(old_group).splitlines() + new_str = [] + for line in difflib.unified_diff( + old_str, new_str, fromfile="old", tofile="new", lineterm="" + ): + if line.startswith("@@"): + print(" " + color_text(line, "36")) + elif line.startswith("-") and not line.startswith("---"): + print(" " + color_text(line, "31")) + elif line.startswith("+") and not line.startswith("+++"): + print(" " + color_text(line, "32")) + else: + print(" " + color_text(f"- {json.dumps(old_group)}", "31")) + else: + old_str = stringify(old_group).splitlines() + new_str = stringify(new_group).splitlines() + + for line in difflib.unified_diff( + old_str, new_str, fromfile="old", tofile="new", lineterm="" + ): + if line.startswith("@@"): + print(" " + color_text(line, "36")) + elif line.startswith("-") and not line.startswith("---"): + print(" " + color_text(line, "31")) + elif line.startswith("+") and not line.startswith("+++"): + print(" " + color_text(line, "32")) diff --git a/src/sempy_labs/_environments.py b/src/sempy_labs/_environments.py index 566e9165..f6cc1823 100644 --- a/src/sempy_labs/_environments.py +++ b/src/sempy_labs/_environments.py @@ -3,9 +3,12 @@ from typing import Optional from sempy_labs._helper_functions import ( resolve_workspace_name_and_id, + resolve_workspace_id, _base_api, - _print_success, _create_dataframe, + resolve_item_id, + delete_item, + create_item, ) from uuid import UUID @@ -32,25 +35,11 @@ def create_environment( or if no lakehouse attached, resolves to the workspace of the notebook. """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - - payload = {"displayName": environment} - - if description: - payload["description"] = description - - _base_api( - request="/v1/workspaces/{workspace_id}/environments", - method="post", - payload=payload, - status_codes=[201, 202], - lro_return_status_code=True, - ) - _print_success( - item_name=environment, - item_type="environment", - workspace_name=workspace_name, - action="created", + create_item( + name=environment, + description=description, + type="Environment", + workspace=workspace, ) @@ -60,6 +49,8 @@ def list_environments(workspace: Optional[str | UUID] = None) -> pd.DataFrame: This is a wrapper function for the following API: `Items - List Environments `_. + Service Principal Authentication is supported (see `here `_ for examples). + Parameters ---------- workspace : str | uuid.UUID, default=None @@ -77,28 +68,47 @@ def list_environments(workspace: Optional[str | UUID] = None) -> pd.DataFrame: "Environment Name": "string", "Environment Id": "string", "Description": "string", + "Publish State": "string", + "Publish Target Version": "string", + "Publish Start Time": "string", + "Publish End Time": "string", + "Spark Libraries State": "string", + "Spark Settings State": "string", } df = _create_dataframe(columns=columns) - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + workspace_id = resolve_workspace_id(workspace) responses = _base_api( - request=f"/v1/workspaces/{workspace_id}/environments", uses_pagination=True + request=f"/v1/workspaces/{workspace_id}/environments", + uses_pagination=True, + client="fabric_sp", ) for r in responses: for v in r.get("value", []): + pub = v.get("properties", {}).get("publishDetails", {}) new_data = { "Environment Name": v.get("displayName"), "Environment Id": v.get("id"), "Description": v.get("description"), + "Publish State": pub.get("state"), + "Publish Target Version": pub.get("targetVersion"), + "Publish Start Time": pub.get("startTime"), + "Publish End Time": pub.get("endTime"), + "Spark Libraries State": pub.get("componentPublishInfo", {}) + .get("sparkLibraries", {}) + .get("state"), + "Spark Settings State": pub.get("componentPublishInfo", {}) + .get("sparkSettings", {}) + .get("state"), } df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df -def delete_environment(environment: str, workspace: Optional[str | UUID] = None): +def delete_environment(environment: str | UUID, workspace: Optional[str | UUID] = None): """ Deletes a Fabric environment. @@ -106,61 +116,48 @@ def delete_environment(environment: str, workspace: Optional[str | UUID] = None) Parameters ---------- - environment: str - Name of the environment. + environment: str | uuid.UUID + Name or ID of the environment. workspace : str | uuid.UUID, default=None The Fabric workspace name or ID. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. """ - from sempy_labs._helper_functions import resolve_environment_id + delete_item(item=environment, type="Environment", workspace=workspace) - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - environment_id = resolve_environment_id( - environment=environment, workspace=workspace_id - ) - - _base_api( - request=f"/v1/workspaces/{workspace_id}/environments/{environment_id}", - method="delete", - ) - _print_success( - item_name=environment, - item_type="environment", - workspace_name=workspace_name, - action="deleted", - ) - -def publish_environment(environment: str, workspace: Optional[str | UUID] = None): +def publish_environment( + environment: str | UUID, workspace: Optional[str | UUID] = None +): """ Publishes a Fabric environment. This is a wrapper function for the following API: `Spark Libraries - Publish Environment `_. + Service Principal Authentication is supported (see `here `_ for examples). + Parameters ---------- - environment: str - Name of the environment. + environment: str | uuid.UUID + Name or ID of the environment. workspace : str | uuid.UUID, default=None The Fabric workspace name or ID. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. """ - from sempy_labs._helper_functions import resolve_environment_id - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - environment_id = resolve_environment_id( - environment=environment, workspace=workspace_id + item_id = resolve_item_id( + item=environment, type="Environment", workspace=workspace_id ) _base_api( - request=f"/v1/workspaces/{workspace_id}/environments/{environment_id}/staging/publish", + request=f"/v1/workspaces/{workspace_id}/environments/{item_id}/staging/publish", method="post", lro_return_status_code=True, status_codes=None, + client="fabric_sp", ) print( diff --git a/src/sempy_labs/_eventhouses.py b/src/sempy_labs/_eventhouses.py index b05a6c4e..a3dbe502 100644 --- a/src/sempy_labs/_eventhouses.py +++ b/src/sempy_labs/_eventhouses.py @@ -1,14 +1,15 @@ -import sempy.fabric as fabric import pandas as pd from typing import Optional from sempy_labs._helper_functions import ( resolve_workspace_name_and_id, _base_api, - _print_success, resolve_item_id, _create_dataframe, _conv_b64, _decode_b64, + delete_item, + create_item, + get_item_definition, ) from uuid import UUID import sempy_labs._icons as icons @@ -39,18 +40,11 @@ def create_eventhouse( or if no lakehouse attached, resolves to the workspace of the notebook. """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - - payload = {"displayName": name} - - if description: - payload["description"] = description - - if definition is not None: - if not isinstance(definition, dict): - raise ValueError(f"{icons.red_dot} The definition must be a dictionary.") + if definition is not None and not isinstance(definition, dict): + raise ValueError(f"{icons.red_dot} The definition must be a dictionary.") - payload["definition"] = { + definition_payload = ( + { "parts": [ { "path": "EventhouseProperties.json", @@ -59,19 +53,16 @@ def create_eventhouse( } ] } - - _base_api( - request=f"/v1/workspaces/{workspace_id}/eventhouses", - method="post", - status_codes=[201, 202], - payload=payload, - lro_return_status_code=True, + if definition is not None + else None ) - _print_success( - item_name=name, - item_type="eventhouse", - workspace_name=workspace_name, - action="created", + + create_item( + name=name, + type="Eventhouse", + workspace=workspace, + description=description, + definition=definition_payload, ) @@ -81,6 +72,8 @@ def list_eventhouses(workspace: Optional[str | UUID] = None) -> pd.DataFrame: This is a wrapper function for the following API: `Items - List Eventhouses `_. + Service Principal Authentication is supported (see `here `_ for examples). + Parameters ---------- workspace : str | uuid.UUID, default=None @@ -104,7 +97,9 @@ def list_eventhouses(workspace: Optional[str | UUID] = None) -> pd.DataFrame: (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) responses = _base_api( - request=f"/v1/workspaces/{workspace_id}/eventhouses", uses_pagination=True + request=f"/v1/workspaces/{workspace_id}/eventhouses", + uses_pagination=True, + client="fabric_sp", ) for r in responses: @@ -135,16 +130,7 @@ def delete_eventhouse(name: str, workspace: Optional[str | UUID] = None): or if no lakehouse attached, resolves to the workspace of the notebook. """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - item_id = resolve_item_id(item=name, type="Eventhouse", workspace=workspace) - - fabric.delete_item(item_id=item_id, workspace=workspace) - _print_success( - item_name=name, - item_type="eventhouse", - workspace_name=workspace_name, - action="deleted", - ) + delete_item(item=name, type="Eventhouse", workspace=workspace) def get_eventhouse_definition( @@ -174,21 +160,9 @@ def get_eventhouse_definition( The eventhouse definition in .json format or as a pandas dataframe. """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - item_id = resolve_item_id(item=eventhouse, type="Eventhouse", workspace=workspace) - - result = _base_api( - request=f"/v1/workspaces/{workspace_id}/eventhouses/{item_id}/getDefinition", - method="post", - status_codes=None, - lro_return_json=True, + return get_item_definition( + item=eventhouse, + type="Eventhouse", + workspace=workspace, + return_dataframe=return_dataframe, ) - - df = pd.json_normalize(result["definition"]["parts"]) - - if return_dataframe: - return df - else: - df_filt = df[df["path"] == "EventhouseProperties.json"] - payload = df_filt["payload"].iloc[0] - return _decode_b64(payload) diff --git a/src/sempy_labs/_eventstreams.py b/src/sempy_labs/_eventstreams.py index d93e0ab0..a770bf18 100644 --- a/src/sempy_labs/_eventstreams.py +++ b/src/sempy_labs/_eventstreams.py @@ -1,14 +1,14 @@ -import sempy.fabric as fabric import pandas as pd from typing import Optional from sempy_labs._helper_functions import ( resolve_workspace_name_and_id, _base_api, - _print_success, - resolve_item_id, + delete_item, _create_dataframe, + create_item, ) from uuid import UUID +import sempy_labs._icons as icons def list_eventstreams(workspace: Optional[str | UUID] = None) -> pd.DataFrame: @@ -74,29 +74,14 @@ def create_eventstream( or if no lakehouse attached, resolves to the workspace of the notebook. """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - - payload = {"displayName": name} - - if description: - payload["description"] = description - - _base_api( - request=f"/v1/workspaces/{workspace_id}/eventstreams", - method="post", - payload=payload, - status_codes=[201, 202], - lro_return_status_code=True, - ) - _print_success( - item_name=name, - item_type="eventstream", - workspace_name=workspace_name, - action="created", + create_item( + name=name, description=description, type="Eventstream", workspace=workspace ) -def delete_eventstream(name: str | UUID, workspace: Optional[str | UUID] = None): +def delete_eventstream( + eventstream: str | UUID, workspace: Optional[str | UUID] = None, **kwargs +): """ Deletes a Fabric eventstream. @@ -104,7 +89,7 @@ def delete_eventstream(name: str | UUID, workspace: Optional[str | UUID] = None) Parameters ---------- - name: str | uuid.UUID + eventstream: str | uuid.UUID Name or ID of the eventstream. workspace : str | uuid.UUID, default=None The Fabric workspace name or ID. @@ -112,13 +97,10 @@ def delete_eventstream(name: str | UUID, workspace: Optional[str | UUID] = None) or if no lakehouse attached, resolves to the workspace of the notebook. """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - item_id = resolve_item_id(item=name, type="Eventstream", workspace=workspace) - - fabric.delete_item(item_id=item_id, workspace=workspace) - _print_success( - item_name=name, - item_type="eventstream", - workspace_name=workspace_name, - action="deleted", - ) + if "name" in kwargs: + eventstream = kwargs["name"] + print( + f"{icons.warning} The 'name' parameter is deprecated. Please use 'eventstream' instead." + ) + + delete_item(item=eventstream, type="Eventstream", workspace=workspace) diff --git a/src/sempy_labs/_external_data_shares.py b/src/sempy_labs/_external_data_shares.py index 21e2b65b..273ed7dd 100644 --- a/src/sempy_labs/_external_data_shares.py +++ b/src/sempy_labs/_external_data_shares.py @@ -1,4 +1,3 @@ -import sempy.fabric as fabric from uuid import UUID import pandas as pd from typing import Optional, List @@ -7,6 +6,7 @@ resolve_workspace_name_and_id, _base_api, _create_dataframe, + resolve_item_id, ) @@ -39,9 +39,7 @@ def create_external_data_share( """ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - item_id = fabric.resolve_item_id( - item_name=item_name, type=item_type, workspace=workspace_id - ) + item_id = resolve_item_id(item=item_name, type=item_type, workspace=workspace_id) if isinstance(paths, str): paths = [paths] @@ -85,9 +83,7 @@ def revoke_external_data_share( """ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - item_id = fabric.resolve_item_id( - item_name=item_name, type=item_type, workspace=workspace_id - ) + item_id = resolve_item_id(item=item_name, type=item_type, workspace=workspace_id) _base_api( request=f"/v1/workspaces/{workspace_id}/items/{item_id}/externalDataShares/{external_data_share_id}/revoke", @@ -124,9 +120,7 @@ def list_external_data_shares_in_item( """ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - item_id = fabric.resolve_item_id( - item_name=item_name, type=item_type, workspace=workspace_id - ) + item_id = resolve_item_id(item=item_name, type=item_type, workspace=workspace_id) columns = { "External Data Share Id": "string", diff --git a/src/sempy_labs/_gateways.py b/src/sempy_labs/_gateways.py index 93f489b3..90066fb7 100644 --- a/src/sempy_labs/_gateways.py +++ b/src/sempy_labs/_gateways.py @@ -314,7 +314,7 @@ def create_vnet_gateway( The name of the subnet. """ - capacity_id = resolve_capacity_id(capacity) + capacity_id = resolve_capacity_id(capacity=capacity) payload = { "type": "VirtualNetwork", "displayName": name, @@ -343,7 +343,7 @@ def create_vnet_gateway( def update_on_premises_gateway( - gateway: str, + gateway: str | UUID, allow_cloud_connection_refresh: Optional[bool] = None, allow_custom_connectors: Optional[bool] = None, load_balancing_setting: Optional[str] = None, @@ -396,7 +396,7 @@ def update_on_premises_gateway( def update_vnet_gateway( - gateway: str, + gateway: str | UUID, capacity: str | UUID, inactivity_minutes_before_sleep: Optional[int] = None, number_of_member_gateways: Optional[int] = None, @@ -425,7 +425,7 @@ def update_vnet_gateway( payload = {} if capacity is not None: - capacity_id = resolve_capacity_id(capacity) + capacity_id = resolve_capacity_id(capacity=capacity) payload["capacityId"] = capacity_id if inactivity_minutes_before_sleep is not None: payload["inactivityMinutesBeforeSleep"] = inactivity_minutes_before_sleep diff --git a/src/sempy_labs/_generate_semantic_model.py b/src/sempy_labs/_generate_semantic_model.py index 1978f724..bf4b1c17 100644 --- a/src/sempy_labs/_generate_semantic_model.py +++ b/src/sempy_labs/_generate_semantic_model.py @@ -5,12 +5,13 @@ from typing import Optional, List from sempy._utils._log import log from sempy_labs._helper_functions import ( - resolve_lakehouse_name, resolve_workspace_name_and_id, resolve_dataset_name_and_id, _conv_b64, _decode_b64, _base_api, + _mount, + resolve_workspace_id, ) from sempy_labs.lakehouse._lakehouse import lakehouse_attached import sempy_labs._icons as icons @@ -252,6 +253,7 @@ def deploy_semantic_model( target_workspace: Optional[str | UUID] = None, refresh_target_dataset: bool = True, overwrite: bool = False, + perspective: Optional[str] = None, ): """ Deploys a semantic model based on an existing semantic model. @@ -274,6 +276,8 @@ def deploy_semantic_model( If set to True, this will initiate a full refresh of the target semantic model in the target workspace. overwrite : bool, default=False If set to True, overwrites the existing semantic model in the workspace if it exists. + perspective : str, default=None + Set this to the name of a perspective in the model and it will reduce the deployed model down to the tables/columns/measures/hierarchies within that perspective. """ (source_workspace_name, source_workspace_id) = resolve_workspace_name_and_id( @@ -282,7 +286,7 @@ def deploy_semantic_model( if target_workspace is None: target_workspace_name = source_workspace_name - target_workspace_id = fabric.resolve_workspace_id(target_workspace_name) + target_workspace_id = resolve_workspace_id(workspace=target_workspace_name) else: (target_workspace_name, target_workspace_id) = resolve_workspace_name_and_id( target_workspace @@ -307,7 +311,21 @@ def deploy_semantic_model( f"{icons.warning} The '{target_dataset}' semantic model already exists within the '{target_workspace_name}' workspace. The 'overwrite' parameter is set to False so the source semantic model was not deployed to the target destination." ) - bim = get_semantic_model_bim(dataset=source_dataset, workspace=source_workspace_id) + if perspective is not None: + + from sempy_labs.tom import connect_semantic_model + + with connect_semantic_model( + dataset=source_dataset, workspace=source_workspace, readonly=True + ) as tom: + + df_added = tom._reduce_model(perspective_name=perspective) + bim = tom.get_bim() + + else: + bim = get_semantic_model_bim( + dataset=source_dataset, workspace=source_workspace_id + ) # Create the semantic model if the model does not exist if dfD_filt.empty: @@ -325,6 +343,9 @@ def deploy_semantic_model( if refresh_target_dataset: refresh_semantic_model(dataset=target_dataset, workspace=target_workspace_id) + if perspective is not None: + return df_added + @log def get_semantic_model_bim( @@ -368,16 +389,16 @@ def get_semantic_model_bim( f"{icons.red_dot} In order to save the model.bim file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook." ) - lakehouse = resolve_lakehouse_name() - folderPath = "/lakehouse/default/Files" - fileExt = ".bim" - if not save_to_file_name.endswith(fileExt): - save_to_file_name = f"{save_to_file_name}{fileExt}" - filePath = os.path.join(folderPath, save_to_file_name) - with open(filePath, "w") as json_file: + local_path = _mount() + save_folder = f"{local_path}/Files" + file_ext = ".bim" + if not save_to_file_name.endswith(file_ext): + save_to_file_name = f"{save_to_file_name}{file_ext}" + file_path = os.path.join(save_folder, save_to_file_name) + with open(file_path, "w") as json_file: json.dump(bimJson, json_file, indent=4) print( - f"{icons.green_dot} The {fileExt} file for the '{dataset_name}' semantic model has been saved to the '{lakehouse}' in this location: '{filePath}'.\n\n" + f"{icons.green_dot} The {file_ext} file for the '{dataset_name}' semantic model has been saved to the lakehouse attached to the notebook within: 'Files/{save_to_file_name}'.\n\n" ) return bimJson @@ -472,23 +493,20 @@ def get_semantic_model_size( Returns ------- int - The size of the semantic model in + The size of the semantic model in bytes """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id) - dict = fabric.evaluate_dax( - dataset=dataset_id, - workspace=workspace_id, + dataset=dataset, + workspace=workspace, dax_string=""" EVALUATE SELECTCOLUMNS(FILTER(INFO.STORAGETABLECOLUMNS(), [COLUMN_TYPE] = "BASIC_DATA"),[DICTIONARY_SIZE]) """, ) used_size = fabric.evaluate_dax( - dataset=dataset_id, - workspace=workspace_id, + dataset=dataset, + workspace=workspace, dax_string=""" EVALUATE SELECTCOLUMNS(INFO.STORAGETABLECOLUMNSEGMENTS(),[USED_SIZE]) """, @@ -503,5 +521,7 @@ def get_semantic_model_size( result = model_size / (1024**2) * 10**6 elif model_size >= 10**3: result = model_size / (1024) * 10**3 + else: + result = model_size return result diff --git a/src/sempy_labs/_git.py b/src/sempy_labs/_git.py index 209ff281..fb3b2fa4 100644 --- a/src/sempy_labs/_git.py +++ b/src/sempy_labs/_git.py @@ -4,6 +4,7 @@ from sempy_labs._helper_functions import ( resolve_workspace_name_and_id, _base_api, + _create_dataframe, ) from uuid import UUID @@ -126,7 +127,7 @@ def connect_workspace_to_github( def disconnect_workspace_from_git(workspace: Optional[str | UUID] = None): """ - Disconnects a workpsace from a git repository. + Disconnects a workspace from a git repository. This is a wrapper function for the following API: `Git - Disconnect `_. @@ -432,3 +433,91 @@ def update_from_git( print( f"{icons.green_dot} The '{workspace_name}' workspace has been updated with commits pushed to the connected branch." ) + + +def get_my_git_credentials( + workspace: Optional[str | UUID] = None, +) -> pd.DataFrame: + """ + Returns the user's Git credentials configuration details. + + This is a wrapper function for the following API: `Git - Get My Git Credentials `_. + + Parameters + ---------- + workspace : str | uuid.UUID, default=None + The workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing the user's Git credentials configuration details. + """ + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + + columns = { + "Source": "string", + } + + df = _create_dataframe(columns) + + response = _base_api(request=f"/v1/workspaces/{workspace_id}/git/myGitCredentials") + + r = response.json() + new_data = { + "Source": r.get("source"), + "Connection Id": r.get("connectionId"), + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + return df + + +def update_my_git_credentials( + source: str, + connection_id: Optional[UUID] = None, + workspace: Optional[str | UUID] = None, +): + """ + Updates the user's Git credentials configuration details. + + This is a wrapper function for the following API: `Git - Update My Git Credentials `_. + + Parameters + ---------- + source : str + The Git credentials source. Valid options: 'Automatic', 'ConfiguredConnection', 'None'. + connection_id : UUID, default=None + The object ID of the connection. Valid only for the 'ConfiguredConnection' source. + workspace : str | uuid.UUID, default=None + The workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + + if source == "ConfiguredConnection" and connection_id is None: + raise ValueError( + f"{icons.red_dot} The 'ConfiguredConnection' source requires a connection_id." + ) + + payload = { + "source": source, + } + + if connection_id is not None: + payload["connectionId"] = connection_id + + _base_api( + request=f"/v1/workspaces/{workspace_id}/git/myGitCredentials", + method="patch", + payload=payload, + ) + + print( + f"{icons.green_dot} The user's Git credentials have been updated accordingly." + ) diff --git a/src/sempy_labs/_graphQL.py b/src/sempy_labs/_graphQL.py index 679122e2..11c5363e 100644 --- a/src/sempy_labs/_graphQL.py +++ b/src/sempy_labs/_graphQL.py @@ -5,7 +5,7 @@ _base_api, _create_dataframe, resolve_workspace_name_and_id, - _print_success, + create_item, ) @@ -15,6 +15,8 @@ def list_graphql_apis(workspace: Optional[str | UUID]) -> pd.DataFrame: This is a wrapper function for the following API: `Items - List GraphQLApis `_. + Service Principal Authentication is supported (see `here `_ for examples). + Parameters ---------- workspace : str | uuid.UUID, default=None @@ -38,7 +40,9 @@ def list_graphql_apis(workspace: Optional[str | UUID]) -> pd.DataFrame: (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) responses = _base_api( - request=f"/v1/workspaces/{workspace_id}/GraphQLApis", uses_pagination=True + request=f"/v1/workspaces/{workspace_id}/GraphQLApis", + uses_pagination=True, + client="fabric_sp", ) for r in responses: @@ -73,23 +77,6 @@ def create_graphql_api( or if no lakehouse attached, resolves to the workspace of the notebook. """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - - payload = {"displayName": name} - - if description: - payload["description"] = description - - _base_api( - request=f"/v1/workspaces/{workspace_id}/GraphQLApis", - method="post", - status_codes=[201, 202], - payload=payload, - lro_return_status_code=True, - ) - _print_success( - item_name=name, - item_type="GraphQL API", - workspace_name=workspace_name, - action="created", + create_item( + name=name, description=description, type="GraphQLApi", workspace=workspace ) diff --git a/src/sempy_labs/_helper_functions.py b/src/sempy_labs/_helper_functions.py index 8a0a64be..6a88dd30 100644 --- a/src/sempy_labs/_helper_functions.py +++ b/src/sempy_labs/_helper_functions.py @@ -4,11 +4,11 @@ import base64 import time import uuid -from sempy.fabric.exceptions import FabricHTTPException +from sempy.fabric.exceptions import FabricHTTPException, WorkspaceNotFoundException import pandas as pd from functools import wraps import datetime -from typing import Optional, Tuple, List +from typing import Optional, Tuple, List, Dict from uuid import UUID import sempy_labs._icons as icons from azure.core.credentials import TokenCredential, AccessToken @@ -17,6 +17,8 @@ from IPython.display import display, HTML import requests import sempy_labs._authentication as auth +from jsonpath_ng.ext import parse +from jsonpath_ng.jsonpath import Fields, Index def _build_url(url: str, params: dict) -> str: @@ -30,10 +32,16 @@ def _build_url(url: str, params: dict) -> str: return url +def _encode_user(user: str) -> str: + + return urllib.parse.quote(user, safe="@") + + def create_abfss_path( lakehouse_id: UUID, lakehouse_workspace_id: UUID, delta_table_name: Optional[str] = None, + schema: Optional[str] = None, ) -> str: """ Creates an abfss path for a delta table in a Fabric lakehouse. @@ -46,6 +54,8 @@ def create_abfss_path( ID of the Fabric workspace. delta_table_name : str, default=None Name of the delta table name. + schema : str, default=None + The schema of the delta table. Returns ------- @@ -57,11 +67,24 @@ def create_abfss_path( path = f"abfss://{lakehouse_workspace_id}@{fp}/{lakehouse_id}" if delta_table_name is not None: - path += f"/Tables/{delta_table_name}" + path += "/Tables" + if schema is not None: + path += f"/{schema}/{delta_table_name}" + else: + path += f"/{delta_table_name}" return path +def create_abfss_path_from_path( + lakehouse_id: UUID, workspace_id: UUID, file_path: str +) -> str: + + fp = _get_default_file_path() + + return f"abfss://{workspace_id}@{fp}/{lakehouse_id}/{file_path}" + + def _get_default_file_path() -> str: default_file_storage = _get_fabric_context_setting(name="fs.defaultFS") @@ -130,14 +153,16 @@ def create_relationship_name( ) -def resolve_report_id(report: str, workspace: Optional[str | UUID] = None) -> UUID: +def resolve_report_id( + report: str | UUID, workspace: Optional[str | UUID] = None +) -> UUID: """ Obtains the ID of the Power BI report. Parameters ---------- - report : str - The name of the Power BI report. + report : str | uuid.UUID + The name or ID of the Power BI report. workspace : str | uuid.UUID, default=None The Fabric workspace name or ID. Defaults to None which resolves to the workspace of the attached lakehouse @@ -145,11 +170,11 @@ def resolve_report_id(report: str, workspace: Optional[str | UUID] = None) -> UU Returns ------- - UUID + uuid.UUID The ID of the Power BI report. """ - return fabric.resolve_item_id(item_name=report, type="Report", workspace=workspace) + return resolve_item_id(item=report, type="Report", workspace=workspace) def resolve_report_name(report_id: UUID, workspace: Optional[str | UUID] = None) -> str: @@ -171,66 +196,149 @@ def resolve_report_name(report_id: UUID, workspace: Optional[str | UUID] = None) The name of the Power BI report. """ - return fabric.resolve_item_name( - item_id=report_id, type="Report", workspace=workspace - ) + return resolve_item_name(item_id=report_id, workspace=workspace) -def resolve_item_id( - item: str | UUID, type: str, workspace: Optional[str] = None -) -> UUID: +def delete_item( + item: str | UUID, type: str, workspace: Optional[str | UUID] = None +) -> None: + """ + Deletes an item from a Fabric workspace. - if _is_valid_uuid(item): - return item - else: - return fabric.resolve_item_id(item_name=item, type=type, workspace=workspace) + Parameters + ---------- + item : str | uuid.UUID + The name or ID of the item to be deleted. + type : str + The type of the item to be deleted. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + from sempy_labs._utils import item_types -def resolve_item_name_and_id( - item: str | UUID, type: Optional[str] = None, workspace: Optional[str | UUID] = None -) -> Tuple[str, UUID]: + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + (item_name, item_id) = resolve_item_name_and_id(item, type, workspace_id) + item_type = item_types.get(type)[0].lower() + + fabric.delete_item(item_id=item_id, workspace=workspace_id) + + print( + f"{icons.green_dot} The '{item_name}' {item_type} has been successfully deleted from the '{workspace_name}' workspace." + ) + + +def create_item( + name: str, + type: str, + description: Optional[str] = None, + definition: Optional[dict] = None, + workspace: Optional[str | UUID] = None, +): + """ + Creates an item in a Fabric workspace. + + Parameters + ---------- + name : str + The name of the item to be created. + type : str + The type of the item to be created. + description : str, default=None + A description of the item to be created. + definition : dict, default=None + The definition of the item to be created. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + from sempy_labs._utils import item_types (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + item_type = item_types.get(type)[0].lower() + item_type_url = item_types.get(type)[1] - if _is_valid_uuid(item): - item_id = item - item_name = fabric.resolve_item_name( - item_id=item_id, type=type, workspace=workspace_id - ) - else: - if type is None: - raise ValueError( - f"{icons.warning} Must specify a 'type' if specifying a name as the 'item'." - ) - item_name = item - item_id = fabric.resolve_item_id( - item_name=item, type=type, workspace=workspace_id - ) + payload = { + "displayName": name, + } + if description: + payload["description"] = description + if definition: + payload["definition"] = definition + + _base_api( + request=f"/v1/workspaces/{workspace_id}/{item_type_url}", + method="post", + payload=payload, + status_codes=[201, 202], + lro_return_status_code=True, + ) + print( + f"{icons.green_dot} The '{name}' {item_type} has been successfully created within the '{workspace_name}' workspace." + ) - return item_name, item_id + +def get_item_definition( + item: str | UUID, + type: str, + workspace: Optional[str | UUID] = None, + format: Optional[str] = None, + return_dataframe: bool = True, + decode: bool = True, +): + from sempy_labs._utils import item_types + + workspace_id = resolve_workspace_id(workspace) + item_id = resolve_item_id(item, type, workspace_id) + item_type_url = item_types.get(type)[1] + path = item_types.get(type)[2] + + url = f"/v1/workspaces/{workspace_id}/{item_type_url}/{item_id}/getDefinition" + if format: + url += f"?format={format}" + + result = _base_api( + request=url, + method="post", + status_codes=None, + lro_return_json=True, + ) + + if return_dataframe: + return pd.json_normalize(result["definition"]["parts"]) + + value = next( + p.get("payload") for p in result["definition"]["parts"] if p.get("path") == path + ) + if decode: + return json.loads(_decode_b64(value)) + else: + return value def resolve_lakehouse_name_and_id( lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None ) -> Tuple[str, UUID]: - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + workspace_id = resolve_workspace_id(workspace) type = "Lakehouse" if lakehouse is None: - lakehouse_id = fabric.get_lakehouse_id() - lakehouse_name = fabric.resolve_item_name( - item_id=lakehouse_id, type=type, workspace=workspace_id - ) - elif _is_valid_uuid(lakehouse): - lakehouse_id = lakehouse - lakehouse_name = fabric.resolve_item_name( - item_id=lakehouse_id, type=type, workspace=workspace_id + lakehouse_id = _get_fabric_context_setting(name="trident.lakehouse.id") + if lakehouse_id == "": + raise ValueError( + f"{icons.red_dot} Cannot resolve a lakehouse. Please enter a valid lakehouse or make sure a lakehouse is attached to the notebook." + ) + (lakehouse_name, lakehouse_id) = resolve_item_name_and_id( + item=lakehouse_id, type=type, workspace=workspace_id ) + else: - lakehouse_name = lakehouse - lakehouse_id = fabric.resolve_item_id( - item_name=lakehouse, type=type, workspace=workspace_id + (lakehouse_name, lakehouse_id) = resolve_item_name_and_id( + item=lakehouse, type=type, workspace=workspace_id ) return lakehouse_name, lakehouse_id @@ -268,14 +376,7 @@ def resolve_dataset_id( The ID of the semantic model. """ - if _is_valid_uuid(dataset): - dataset_id = dataset - else: - dataset_id = fabric.resolve_item_id( - item_name=dataset, type="SemanticModel", workspace=workspace - ) - - return dataset_id + return resolve_item_id(item=dataset, type="SemanticModel", workspace=workspace) def resolve_dataset_name( @@ -299,9 +400,7 @@ def resolve_dataset_name( The name of the semantic model. """ - return fabric.resolve_item_name( - item_id=dataset_id, type="SemanticModel", workspace=workspace - ) + return resolve_item_name(item_id=dataset_id, workspace=workspace) def resolve_lakehouse_name( @@ -327,11 +426,13 @@ def resolve_lakehouse_name( """ if lakehouse_id is None: - lakehouse_id = fabric.get_lakehouse_id() + lakehouse_id = _get_fabric_context_setting(name="trident.lakehouse.id") + if lakehouse_id == "": + raise ValueError( + f"{icons.red_dot} Cannot resolve a lakehouse. Please enter a valid lakehouse or make sure a lakehouse is attached to the notebook." + ) - return fabric.resolve_item_name( - item_id=lakehouse_id, type="Lakehouse", workspace=workspace - ) + return resolve_item_name(item_id=lakehouse_id, workspace=workspace) def resolve_lakehouse_id( @@ -356,12 +457,14 @@ def resolve_lakehouse_id( """ if lakehouse is None: - lakehouse_id = fabric.get_lakehouse_id() - elif _is_valid_uuid(lakehouse): - lakehouse_id = lakehouse + lakehouse_id = _get_fabric_context_setting(name="trident.lakehouse.id") + if lakehouse_id == "": + raise ValueError( + f"{icons.red_dot} Cannot resolve a lakehouse. Please enter a valid lakehouse or make sure a lakehouse is attached to the notebook." + ) else: - lakehouse_id = fabric.resolve_item_id( - item_name=lakehouse, type="Lakehouse", workspace=workspace + lakehouse_id = resolve_item_id( + item=lakehouse, type="Lakehouse", workspace=workspace ) return lakehouse_id @@ -489,11 +592,13 @@ def save_as_delta_table( workspace: Optional[str | UUID] = None, ): """ - Saves a pandas dataframe as a delta table in a Fabric lakehouse. + Saves a pandas or spark dataframe as a delta table in a Fabric lakehouse. + + This function may be executed in either a PySpark or pure Python notebook. If executing in a pure Python notebook, the dataframe must be a pandas dataframe. Parameters ---------- - dataframe : pandas.DataFrame + dataframe : pandas.DataFrame | spark.Dataframe The dataframe to be saved as a delta table. delta_table_name : str The name of the delta table. @@ -512,19 +617,6 @@ def save_as_delta_table( or if no lakehouse attached, resolves to the workspace of the notebook. """ - from pyspark.sql.types import ( - StringType, - IntegerType, - FloatType, - DateType, - StructType, - StructField, - BooleanType, - LongType, - DoubleType, - TimestampType, - ) - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id( lakehouse=lakehouse, workspace=workspace_id @@ -543,50 +635,101 @@ def save_as_delta_table( f"{icons.red_dot} Invalid 'delta_table_name'. Delta tables in the lakehouse cannot have spaces in their names." ) - dataframe.columns = [col.replace(" ", "_") for col in dataframe.columns] - - spark = _create_spark_session() + import pyarrow as pa + from pyspark.sql.types import ( + StringType, + IntegerType, + FloatType, + DateType, + StructType, + StructField, + BooleanType, + LongType, + DoubleType, + TimestampType, + ) - type_mapping = { - "string": StringType(), - "str": StringType(), - "integer": IntegerType(), - "int": IntegerType(), - "float": FloatType(), - "date": DateType(), - "bool": BooleanType(), - "boolean": BooleanType(), - "long": LongType(), - "double": DoubleType(), - "timestamp": TimestampType(), - } + def get_type_mapping(pure_python): + common_mapping = { + "string": ("pa", pa.string(), StringType()), + "str": ("pa", pa.string(), StringType()), + "integer": ("pa", pa.int32(), IntegerType()), + "int": ("pa", pa.int32(), IntegerType()), + "float": ("pa", pa.float32(), FloatType()), + "double": ("pa", pa.float64(), DoubleType()), + "long": ("pa", pa.int64(), LongType()), + "bool": ("pa", pa.bool_(), BooleanType()), + "boolean": ("pa", pa.bool_(), BooleanType()), + "date": ("pa", pa.date32(), DateType()), + "timestamp": ("pa", pa.timestamp("us"), TimestampType()), + } + return {k: v[1] if pure_python else v[2] for k, v in common_mapping.items()} - if isinstance(dataframe, pd.DataFrame): - if schema is None: - spark_df = spark.createDataFrame(dataframe) + def build_schema(schema_dict, type_mapping, use_arrow=True): + if use_arrow: + fields = [ + pa.field(name, type_mapping.get(dtype.lower())) + for name, dtype in schema_dict.items() + ] + return pa.schema(fields) else: - schema_map = StructType( + return StructType( [ - StructField(column_name, type_mapping[data_type], True) - for column_name, data_type in schema.items() + StructField(name, type_mapping.get(dtype.lower()), True) + for name, dtype in schema_dict.items() ] ) - spark_df = spark.createDataFrame(dataframe, schema_map) + + # Main logic + schema_map = None + if schema is not None: + use_arrow = _pure_python_notebook() + type_mapping = get_type_mapping(use_arrow) + schema_map = build_schema(schema, type_mapping, use_arrow) + + if isinstance(dataframe, pd.DataFrame): + dataframe.columns = [col.replace(" ", "_") for col in dataframe.columns] + if _pure_python_notebook(): + spark_df = dataframe + else: + spark = _create_spark_session() + if schema is None: + spark_df = spark.createDataFrame(dataframe) + else: + spark_df = spark.createDataFrame(dataframe, schema_map) else: + for col_name in dataframe.columns: + new_name = col_name.replace(" ", "_") + dataframe = dataframe.withColumnRenamed(col_name, new_name) spark_df = dataframe - filePath = create_abfss_path( + file_path = create_abfss_path( lakehouse_id=lakehouse_id, lakehouse_workspace_id=workspace_id, delta_table_name=delta_table_name, ) - if merge_schema: - spark_df.write.mode(write_mode).format("delta").option( - "mergeSchema", "true" - ).save(filePath) + if _pure_python_notebook(): + from deltalake import write_deltalake + + write_args = { + "table_or_uri": file_path, + "data": spark_df, + "mode": write_mode, + "schema": schema_map, + } + + if merge_schema: + write_args["schema_mode"] = "merge" + + write_deltalake(**write_args) else: - spark_df.write.mode(write_mode).format("delta").save(filePath) + writer = spark_df.write.mode(write_mode).format("delta") + if merge_schema: + writer = writer.option("mergeSchema", "true") + + writer.save(file_path) + print( f"{icons.green_dot} The dataframe has been saved as the '{delta_table_name}' table in the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace." ) @@ -628,6 +771,55 @@ def language_validate(language: str): return lang +def resolve_workspace_id( + workspace: Optional[str | UUID] = None, +) -> UUID: + if workspace is None: + workspace_id = _get_fabric_context_setting(name="trident.workspace.id") + elif _is_valid_uuid(workspace): + # Check (optional) + workspace_id = workspace + try: + _base_api(request=f"/v1/workspaces/{workspace_id}", client="fabric_sp") + except FabricHTTPException: + raise ValueError( + f"{icons.red_dot} The '{workspace_id}' workspace was not found." + ) + else: + responses = _base_api( + request="/v1/workspaces", client="fabric_sp", uses_pagination=True + ) + workspace_id = None + for r in responses: + for v in r.get("value", []): + display_name = v.get("displayName") + if display_name == workspace: + workspace_id = v.get("id") + break + + if workspace_id is None: + raise WorkspaceNotFoundException(workspace) + + return workspace_id + + +def resolve_workspace_name(workspace_id: Optional[UUID] = None) -> str: + + if workspace_id is None: + workspace_id = _get_fabric_context_setting(name="trident.workspace.id") + + try: + response = _base_api( + request=f"/v1/workspaces/{workspace_id}", client="fabric_sp" + ).json() + except FabricHTTPException: + raise ValueError( + f"{icons.red_dot} The '{workspace_id}' workspace was not found." + ) + + return response.get("displayName") + + def resolve_workspace_name_and_id( workspace: Optional[str | UUID] = None, ) -> Tuple[str, str]: @@ -643,21 +835,115 @@ def resolve_workspace_name_and_id( Returns ------- - str, str + str, uuid.UUID The name and ID of the Fabric workspace. """ if workspace is None: - workspace_id = fabric.get_workspace_id() - workspace_name = fabric.resolve_workspace_name(workspace_id) + workspace_id = _get_fabric_context_setting(name="trident.workspace.id") + workspace_name = resolve_workspace_name(workspace_id) elif _is_valid_uuid(workspace): workspace_id = workspace - workspace_name = fabric.resolve_workspace_name(workspace_id) + workspace_name = resolve_workspace_name(workspace_id) + else: + responses = _base_api( + request="/v1/workspaces", client="fabric_sp", uses_pagination=True + ) + workspace_id = None + workspace_name = None + for r in responses: + for v in r.get("value", []): + display_name = v.get("displayName") + if display_name == workspace: + workspace_name = workspace + workspace_id = v.get("id") + break + + if workspace_name is None or workspace_id is None: + raise WorkspaceNotFoundException(workspace) + + return workspace_name, workspace_id + + +def resolve_item_id( + item: str | UUID, type: Optional[str] = None, workspace: Optional[str | UUID] = None +) -> UUID: + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + item_id = None + + if _is_valid_uuid(item): + # Check (optional) + item_id = item + try: + _base_api( + request=f"/v1/workspaces/{workspace_id}/items/{item_id}", + client="fabric_sp", + ) + except FabricHTTPException: + raise ValueError( + f"{icons.red_dot} The '{item_id}' item was not found in the '{workspace_name}' workspace." + ) else: - workspace_name = workspace - workspace_id = fabric.resolve_workspace_id(workspace_name) + if type is None: + raise ValueError( + f"{icons.red_dot} The 'type' parameter is required if specifying an item name." + ) + responses = _base_api( + request=f"/v1/workspaces/{workspace_id}/items?type={type}", + client="fabric_sp", + uses_pagination=True, + ) + for r in responses: + for v in r.get("value", []): + display_name = v.get("displayName") + if display_name == item: + item_id = v.get("id") + break + + if item_id is None: + raise ValueError( + f"{icons.red_dot} There's no item '{item}' of type '{type}' in the '{workspace_name}' workspace." + ) + + return item_id + + +def resolve_item_name_and_id( + item: str | UUID, type: Optional[str] = None, workspace: Optional[str | UUID] = None +) -> Tuple[str, UUID]: + + workspace_id = resolve_workspace_id(workspace) + item_id = resolve_item_id(item=item, type=type, workspace=workspace_id) + item_name = ( + _base_api( + request=f"/v1/workspaces/{workspace_id}/items/{item_id}", client="fabric_sp" + ) + .json() + .get("displayName") + ) + + return item_name, item_id - return str(workspace_name), str(workspace_id) + +def resolve_item_name(item_id: UUID, workspace: Optional[str | UUID] = None) -> str: + + workspace_id = resolve_workspace_id(workspace) + try: + item_name = ( + _base_api( + request=f"/v1/workspaces/{workspace_id}/items/{item_id}", + client="fabric_sp", + ) + .json() + .get("displayName") + ) + except FabricHTTPException: + raise ValueError( + f"{icons.red_dot} The '{item_id}' item was not found in the '{workspace_id}' workspace." + ) + + return item_name def _extract_json(dataframe: pd.DataFrame) -> dict: @@ -770,7 +1056,7 @@ def resolve_dataset_from_report( dfR = _get_report(report=report, workspace=workspace) dataset_id = dfR["Dataset Id"].iloc[0] dataset_workspace_id = dfR["Dataset Workspace Id"].iloc[0] - dataset_workspace = fabric.resolve_workspace_name(dataset_workspace_id) + dataset_workspace = resolve_workspace_name(workspace_id=dataset_workspace_id) dataset_name = resolve_dataset_name( dataset_id=dataset_id, workspace=dataset_workspace ) @@ -803,12 +1089,13 @@ def resolve_workspace_capacity( Tuple[uuid.UUID, str] capacity Id; capacity came. """ + from sempy_labs._capacities import list_capacities (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) filter_condition = urllib.parse.quote(workspace_id) dfW = fabric.list_workspaces(filter=f"id eq '{filter_condition}'") capacity_id = dfW["Capacity Id"].iloc[0] - dfC = fabric.list_capacities() + dfC = list_capacities() dfC_filt = dfC[dfC["Id"] == capacity_id] if len(dfC_filt) == 1: capacity_name = dfC_filt["Display Name"].iloc[0] @@ -866,8 +1153,10 @@ def get_capacity_name(workspace: Optional[str | UUID] = None) -> str: The capacity name. """ + from sempy_labs._capacities import list_capacities + capacity_id = get_capacity_id(workspace) - dfC = fabric.list_capacities() + dfC = list_capacities() dfC_filt = dfC[dfC["Id"] == capacity_id] if dfC_filt.empty: raise ValueError( @@ -893,11 +1182,12 @@ def resolve_capacity_name(capacity_id: Optional[UUID] = None) -> str: str The capacity name. """ + from sempy_labs._capacities import list_capacities if capacity_id is None: return get_capacity_name() - dfC = fabric.list_capacities() + dfC = list_capacities() dfC_filt = dfC[dfC["Id"] == capacity_id] if dfC_filt.empty: @@ -908,14 +1198,14 @@ def resolve_capacity_name(capacity_id: Optional[UUID] = None) -> str: return dfC_filt["Display Name"].iloc[0] -def resolve_capacity_id(capacity_name: Optional[str] = None) -> UUID: +def resolve_capacity_id(capacity: Optional[str | UUID] = None, **kwargs) -> UUID: """ Obtains the capacity Id for a given capacity name. Parameters ---------- - capacity_name : str, default=None - The capacity name. + capacity : str | uuid.UUID, default=None + The capacity name or ID. Defaults to None which resolves to the capacity id of the workspace of the attached lakehouse or if no lakehouse attached, resolves to the capacity name of the workspace of the notebook. @@ -924,17 +1214,24 @@ def resolve_capacity_id(capacity_name: Optional[str] = None) -> UUID: uuid.UUID The capacity Id. """ + from sempy_labs._capacities import list_capacities - if capacity_name is None: + if "capacity_name" in kwargs: + capacity = kwargs["capacity_name"] + print( + f"{icons.warning} The 'capacity_name' parameter is deprecated. Please use 'capacity' instead." + ) + + if capacity is None: return get_capacity_id() + if _is_valid_uuid(capacity): + return capacity - dfC = fabric.list_capacities() - dfC_filt = dfC[dfC["Display Name"] == capacity_name] + dfC = list_capacities() + dfC_filt = dfC[dfC["Display Name"] == capacity] if dfC_filt.empty: - raise ValueError( - f"{icons.red_dot} The '{capacity_name}' capacity does not exist." - ) + raise ValueError(f"{icons.red_dot} The '{capacity}' capacity does not exist.") return dfC_filt["Id"].iloc[0] @@ -1057,10 +1354,8 @@ def get_token( import notebookutils - token = notebookutils.credentials.getToken(scopes) - access_token = AccessToken(token, 0) - - return access_token + token = notebookutils.credentials.getToken("storage") + return AccessToken(token, 0) def _get_adls_client(account_name): @@ -1069,11 +1364,21 @@ def _get_adls_client(account_name): account_url = f"https://{account_name}.dfs.core.windows.net" - service_client = DataLakeServiceClient( - account_url, credential=FabricTokenCredential() + return DataLakeServiceClient(account_url, credential=FabricTokenCredential()) + + +def _get_blob_client(workspace_id: UUID, item_id: UUID): + + from azure.storage.blob import BlobServiceClient + + endpoint = _get_fabric_context_setting(name="trident.onelake.endpoint").replace( + ".dfs.", ".blob." ) + url = f"https://{endpoint}/{workspace_id}/{item_id}" + + # account_url = f"https://{account_name}.blob.core.windows.net" - return service_client + return BlobServiceClient(url, credential=FabricTokenCredential()) def resolve_warehouse_id( @@ -1097,12 +1402,7 @@ def resolve_warehouse_id( The warehouse Id. """ - if _is_valid_uuid(warehouse): - return warehouse - else: - return fabric.resolve_item_id( - item_name=warehouse, type="Warehouse", workspace=workspace - ) + return resolve_item_id(item=warehouse, type="Warehouse", workspace=workspace) def get_language_codes(languages: str | List[str]): @@ -1162,14 +1462,14 @@ def convert_to_alphanumeric_lowercase(input_string): def resolve_environment_id( - environment: str, workspace: Optional[str | UUID] = None + environment: str | UUID, workspace: Optional[str | UUID] = None ) -> UUID: """ Obtains the environment Id for a given environment. Parameters ---------- - environment: str + environment: str | uuid.UUID Name of the environment. workspace : str | uuid.UUID, default=None The Fabric workspace name or ID in which the semantic model resides. @@ -1178,13 +1478,11 @@ def resolve_environment_id( Returns ------- - UUID + uuid.UUID The environment Id. """ - return fabric.resolve_item_id( - item_name=environment, type="Environment", workspace=workspace - ) + return resolve_item_id(item=environment, type="Environment", workspace=workspace) def _make_clickable(val): @@ -1216,14 +1514,16 @@ def convert_to_friendly_case(text: str) -> str: return text -def resolve_notebook_id(notebook: str, workspace: Optional[str | UUID] = None) -> UUID: +def resolve_notebook_id( + notebook: str | UUID, workspace: Optional[str | UUID] = None +) -> UUID: """ Obtains the notebook Id for a given notebook. Parameters ---------- - notebook: str - Name of the notebook. + notebook: str | uuid.UUID + Name or ID of the notebook. workspace : str | uuid.UUID, default=None The Fabric workspace name or ID in which the semantic model resides. Defaults to None which resolves to the workspace of the attached lakehouse @@ -1235,9 +1535,7 @@ def resolve_notebook_id(notebook: str, workspace: Optional[str | UUID] = None) - The notebook Id. """ - return fabric.resolve_item_id( - item_name=notebook, type="Notebook", workspace=workspace - ) + return resolve_item_id(item=notebook, type="Notebook", workspace=workspace) def generate_guid(): @@ -1247,32 +1545,108 @@ def generate_guid(): def _get_column_aggregate( table_name: str, - column_name: str = "RunId", + column_name: str | List[str] = "RunId", lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None, function: str = "max", default_value: int = 0, -) -> int: + schema_name: Optional[str] = None, +) -> int | Dict[str, int]: - from pyspark.sql.functions import approx_count_distinct - from pyspark.sql import functions as F + workspace_id = resolve_workspace_id(workspace) + lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id) + path = create_abfss_path(lakehouse_id, workspace_id, table_name, schema_name) + df = _read_delta_table(path) - function = function.upper() - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - lakehouse_id = resolve_lakehouse_id(lakehouse, workspace) - path = create_abfss_path(lakehouse_id, workspace_id, table_name) + function = function.lower() - spark = _create_spark_session() - df = spark.read.format("delta").load(path) + if isinstance(column_name, str): + column_name = [column_name] + + if _pure_python_notebook(): + import polars as pl + + if not isinstance(df, pd.DataFrame): + df.to_pandas() + + df = pl.from_pandas(df) + + def get_expr(col): + col_dtype = df.schema[col] + + if "approx" in function: + return pl.col(col).unique().count().alias(col) + elif "distinct" in function: + if col_dtype == pl.Decimal: + return pl.col(col).cast(pl.Float64).n_unique().alias(col) + else: + return pl.col(col).n_unique().alias(col) + elif function == "sum": + return pl.col(col).sum().alias(col) + elif function == "min": + return pl.col(col).min().alias(col) + elif function == "max": + return pl.col(col).max().alias(col) + elif function == "count": + return pl.col(col).count().alias(col) + elif function in {"avg", "mean"}: + return pl.col(col).mean().alias(col) + else: + raise ValueError(f"Unsupported function: {function}") + + exprs = [get_expr(col) for col in column_name] + aggs = df.select(exprs).to_dict(as_series=False) - if function in {"COUNTDISTINCT", "DISTINCTCOUNT"}: - result = df.select(F.count_distinct(F.col(column_name))) - elif "APPROX" in function: - result = df.select(approx_count_distinct(column_name)) + if len(column_name) == 1: + result = aggs[column_name[0]][0] or default_value + else: + result = {col: aggs[col][0] for col in column_name} else: - result = df.selectExpr(f"{function}({column_name})") + from pyspark.sql.functions import ( + count, + sum, + min, + max, + avg, + approx_count_distinct, + countDistinct, + ) + + result = None + if "approx" in function: + spark_func = approx_count_distinct + elif "distinct" in function: + spark_func = countDistinct + elif function == "count": + spark_func = count + elif function == "sum": + spark_func = sum + elif function == "min": + spark_func = min + elif function == "max": + spark_func = max + elif function == "avg": + spark_func = avg + else: + raise ValueError(f"Unsupported function: {function}") + + agg_exprs = [] + for col in column_name: + agg_exprs.append(spark_func(col).alias(col)) + + aggs = df.agg(*agg_exprs).collect()[0] + if len(column_name) == 1: + result = aggs[0] or default_value + else: + result = {col: aggs[col] for col in column_name} - return result.collect()[0][0] or default_value + return result + + +def _create_spark_dataframe(df: pd.DataFrame): + + spark = _create_spark_session() + return spark.createDataFrame(df) def _make_list_unique(my_list): @@ -1367,6 +1741,9 @@ def _process_and_display_chart(df, title, widget): df["Start"] = df["Start"] - Offset df["End"] = df["End"] - Offset + unique_objects = df["Object Name"].nunique() + height = min(max(400, unique_objects * 30), 1000) + # Vega-Lite spec for Gantt chart spec = ( """{ @@ -1376,7 +1753,9 @@ def _process_and_display_chart(df, title, widget): + df.to_json(orient="records") + """ }, "width": 700, - "height": 400, + "height": """ + + str(height) + + """, "mark": "bar", "encoding": { "y": { @@ -1436,6 +1815,8 @@ def _convert_data_type(input_data_type: str) -> str: "date": "DateTime", "double": "Double", "float": "Double", + "binary": "Boolean", + "long": "Int64", } if "decimal" in input_data_type: @@ -1490,19 +1871,23 @@ def _base_api( lro_return_json: bool = False, lro_return_status_code: bool = False, ): - + import notebookutils from sempy_labs._authentication import _get_headers if (lro_return_json or lro_return_status_code) and status_codes is None: status_codes = [200, 202] + def get_token(audience="pbi"): + return notebookutils.credentials.getToken(audience) + if isinstance(status_codes, int): status_codes = [status_codes] if client == "fabric": - c = fabric.FabricRestClient() + c = fabric.FabricRestClient(token_provider=get_token) elif client == "fabric_sp": - c = fabric.FabricRestClient(token_provider=auth.token_provider.get()) + token = auth.token_provider.get() or get_token + c = fabric.FabricRestClient(token_provider=token) elif client in ["azure", "graph"]: pass else: @@ -1523,9 +1908,15 @@ def _base_api( raise NotImplementedError else: headers = _get_headers(auth.token_provider.get(), audience=client) + if client == "graph": + url = f"https://graph.microsoft.com/v1.0/{request}" + elif client == "azure": + url = request + else: + raise NotImplementedError response = requests.request( method.upper(), - f"https://graph.microsoft.com/v1.0/{request}", + url, headers=headers, json=payload, ) @@ -1581,6 +1972,18 @@ def _update_dataframe_datatypes(dataframe: pd.DataFrame, column_map: dict): dataframe[column] = dataframe[column].fillna(0).astype(int) elif data_type in ["str", "string"]: dataframe[column] = dataframe[column].astype(str) + # Avoid having empty lists or lists with a value of None. + elif data_type in ["list"]: + dataframe[column] = dataframe[column].apply( + lambda x: ( + None + if (type(x) == list and len(x) == 1 and x[0] == None) + or (type(x) == list and len(x) == 0) + else x + ) + ) + elif data_type in ["dict"]: + dataframe[column] = dataframe[column] else: raise NotImplementedError @@ -1617,18 +2020,58 @@ def _create_spark_session(): return SparkSession.builder.getOrCreate() -def _read_delta_table(path: str): +def _get_delta_table(path: str) -> str: + + from delta import DeltaTable spark = _create_spark_session() - return spark.read.format("delta").load(path) + return DeltaTable.forPath(spark, path) -def _delta_table_row_count(table_name: str) -> int: +def _read_delta_table(path: str, to_pandas: bool = True, to_df: bool = False): - spark = _create_spark_session() + if _pure_python_notebook(): + from deltalake import DeltaTable + + df = DeltaTable(table_uri=path) + if to_pandas: + df = df.to_pandas() + else: + spark = _create_spark_session() + df = spark.read.format("delta").load(path) + if to_df: + df = df.toDF() + + return df + + +def _read_delta_table_history(path) -> pd.DataFrame: + + if _pure_python_notebook(): + from deltalake import DeltaTable + + df = pd.DataFrame(DeltaTable(table_uri=path).history()) + else: + from delta import DeltaTable + + spark = _create_spark_session() + delta_table = DeltaTable.forPath(spark, path) + df = delta_table.history().toPandas() + + return df + + +def _delta_table_row_count(path: str) -> int: + + if _pure_python_notebook(): + from deltalake import DeltaTable - return spark.table(table_name).count() + dt = DeltaTable(path) + arrow_table = dt.to_pyarrow_table() + return arrow_table.num_rows + else: + return _read_delta_table(path).count() def _run_spark_sql_query(query): @@ -1638,7 +2081,9 @@ def _run_spark_sql_query(query): return spark.sql(query) -def _mount(lakehouse, workspace) -> str: +def _mount( + lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None +) -> str: """ Mounts a lakehouse to a notebook if it is not already mounted. Returns the local path to the lakehouse. """ @@ -1650,6 +2095,16 @@ def _mount(lakehouse, workspace) -> str: lakehouse=lakehouse, workspace=workspace ) + # Hide display mounts + current_setting = "" + try: + current_setting = notebookutils.conf.get( + "spark.notebookutils.displaymountpoint.enabled" + ) + notebookutils.conf.set("spark.notebookutils.displaymountpoint.enabled", "false") + except Exception: + pass + lake_path = create_abfss_path(lakehouse_id, workspace_id) mounts = notebookutils.fs.mounts() mount_point = f"/{workspace_name.replace(' ', '')}{lakehouse_name.replace(' ', '')}" @@ -1661,8 +2116,322 @@ def _mount(lakehouse, workspace) -> str: ) mounts = notebookutils.fs.mounts() + + # Set display mounts to original setting + try: + if current_setting != "false": + notebookutils.conf.set( + "spark.notebookutils.displaymountpoint.enabled", "true" + ) + except Exception: + pass + local_path = next( i.get("localPath") for i in mounts if i.get("source") == lake_path ) return local_path + + +def _get_or_create_workspace( + workspace: str, + capacity: Optional[str | UUID] = None, + description: Optional[str] = None, +) -> Tuple[str, UUID]: + + capacity_id = resolve_capacity_id(capacity) + dfW = fabric.list_workspaces() + dfW_filt_name = dfW[dfW["Name"] == workspace] + dfW_filt_id = dfW[dfW["Id"] == workspace] + + # Workspace already exists + if (not dfW_filt_name.empty) or (not dfW_filt_id.empty): + print(f"{icons.green_dot} The '{workspace}' workspace already exists.") + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + return (workspace_name, workspace_id) + + # Do not create workspace with name of an ID + if _is_valid_uuid(workspace): + raise ValueError(f"{icons.warning} Must enter a workspace name, not an ID.") + + print(f"{icons.in_progress} Creating the '{workspace}' workspace...") + workspace_id = fabric.create_workspace( + display_name=workspace, capacity_id=capacity_id, description=description + ) + print( + f"{icons.green_dot} The '{workspace}' workspace has been successfully created." + ) + + return (workspace, workspace_id) + + +def _get_or_create_lakehouse( + lakehouse: str, + workspace: Optional[str | UUID] = None, + description: Optional[str] = None, +) -> Tuple[str, UUID]: + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + + dfI = fabric.list_items(type="Lakehouse", workspace=workspace) + dfI_filt_name = dfI[dfI["Display Name"] == lakehouse] + dfI_filt_id = dfI[dfI["Id"] == lakehouse] + + if (not dfI_filt_name.empty) or (not dfI_filt_id.empty): + print(f"{icons.green_dot} The '{lakehouse}' lakehouse already exists.") + (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id( + lakehouse=lakehouse, workspace=workspace + ) + return (lakehouse_name, lakehouse_id) + if _is_valid_uuid(lakehouse): + raise ValueError(f"{icons.warning} Must enter a lakehouse name, not an ID.") + + print(f"{icons.in_progress} Creating the '{lakehouse}' lakehouse...") + lakehouse_id = fabric.create_lakehouse( + display_name=lakehouse, workspace=workspace, description=description + ) + print( + f"{icons.green_dot} The '{lakehouse}' lakehouse has been successfully created within the '{workspace_name}' workspace." + ) + + return (lakehouse, lakehouse_id) + + +def _get_or_create_warehouse( + warehouse: str, + workspace: Optional[str | UUID] = None, + description: Optional[str] = None, +) -> Tuple[str, UUID]: + + from sempy_labs._warehouses import create_warehouse + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + + dfI = fabric.list_items(type="Warehouse", workspace=workspace) + dfI_filt_name = dfI[dfI["Display Name"] == warehouse] + dfI_filt_id = dfI[dfI["Id"] == warehouse] + + if (not dfI_filt_name.empty) or (not dfI_filt_id.empty): + print(f"{icons.green_dot} The '{warehouse}' warehouse already exists.") + (warehouse_name, warehouse_id) = resolve_item_name_and_id( + warehouse=warehouse, type="Warehouse", workspace=workspace + ) + return (warehouse_name, warehouse_id) + if _is_valid_uuid(warehouse): + raise ValueError(f"{icons.warning} Must enter a warehouse name, not an ID.") + + print(f"{icons.in_progress} Creating the '{warehouse}' warehouse...") + warehouse_id = create_warehouse( + display_name=warehouse, workspace=workspace, description=description + ) + print( + f"{icons.green_dot} The '{warehouse}' warehouse has been successfully created within the '{workspace_name}' workspace." + ) + + return (warehouse, warehouse_id) + + +def _xml_to_dict(element): + data = {element.tag: {} if element.attrib else None} + children = list(element) + if children: + temp_dict = {} + for child in children: + child_dict = _xml_to_dict(child) + for key, value in child_dict.items(): + if key in temp_dict: + if isinstance(temp_dict[key], list): + temp_dict[key].append(value) + else: + temp_dict[key] = [temp_dict[key], value] + else: + temp_dict[key] = value + data[element.tag] = temp_dict + else: + data[element.tag] = ( + element.text.strip() if element.text and element.text.strip() else None + ) + return data + + +def file_exists(file_path: str) -> bool: + """ + Check if a file exists in the given path. + + Parameters + ---------- + file_path : str + The path to the file. + + Returns + ------- + bool + True if the file exists, False otherwise. + """ + + import notebookutils + + return len(notebookutils.fs.ls(file_path)) > 0 + + +def generate_number_guid(): + + guid = uuid.uuid4() + return str(guid.int & ((1 << 64) - 1)) + + +def get_url_content(url: str): + + if "github.com" in url and "/blob/" in url: + url = url.replace("github.com", "raw.githubusercontent.com") + url = url.replace("/blob/", "/") + + response = requests.get(url) + if response.ok: + try: + data = response.json() # Only works if the response is valid JSON + except ValueError: + data = response.text # Fallback: get raw text content + return data + else: + print(f"Failed to fetch raw content: {response.status_code}") + + +def generate_hex(length: int = 10) -> str: + """ + Generate a random hex string of the specified length. Used for generating IDs for report objects (page, visual, bookmark etc.). + """ + import secrets + + return secrets.token_hex(length) + + +def decode_payload(payload): + + if is_base64(payload): + try: + decoded_payload = json.loads(base64.b64decode(payload).decode("utf-8")) + except Exception: + decoded_payload = base64.b64decode(payload) + elif isinstance(payload, dict): + decoded_payload = payload + else: + raise ValueError("Payload must be a dictionary or a base64 encoded value.") + + return decoded_payload + + +def is_base64(s): + try: + # Add padding if needed + s_padded = s + "=" * (-len(s) % 4) + decoded = base64.b64decode(s_padded, validate=True) + # Optional: check if re-encoding gives the original (excluding padding) + return base64.b64encode(decoded).decode().rstrip("=") == s.rstrip("=") + except Exception: + return False + + +def get_jsonpath_value( + data, path, default=None, remove_quotes=False, fix_true: bool = False +): + matches = parse(path).find(data) + result = matches[0].value if matches else default + if result and remove_quotes and isinstance(result, str): + if result.startswith("'") and result.endswith("'"): + result = result[1:-1] + if fix_true and isinstance(result, str): + if result.lower() == "true": + result = True + elif result.lower() == "false": + result = False + return result + + +def set_json_value(payload: dict, json_path: str, json_value: str | dict | List): + + jsonpath_expr = parse(json_path) + matches = jsonpath_expr.find(payload) + + if matches: + # Update all matches + for match in matches: + parent = match.context.value + path = match.path + if isinstance(path, Fields): + parent[path.fields[0]] = json_value + elif isinstance(path, Index): + parent[path.index] = json_value + else: + # Handle creation + parts = json_path.lstrip("$").strip(".").split(".") + current = payload + + for i, part in enumerate(parts): + is_last = i == len(parts) - 1 + + # Detect list syntax like "lockAspect[*]" + list_match = re.match(r"(\w+)\[\*\]", part) + if list_match: + list_key = list_match.group(1) + if list_key not in current or not isinstance(current[list_key], list): + # Initialize with one dict element + current[list_key] = [{}] + + for item in current[list_key]: + if is_last: + # Last part, assign value + item = json_value + else: + # Proceed to next level + if not isinstance(item, dict): + raise ValueError( + f"Expected dict in list for key '{list_key}', got {type(item)}" + ) + next_part = ".".join(parts[i + 1 :]) + set_json_value(item, "$." + next_part, json_value) + return payload + else: + if part not in current or not isinstance(current[part], dict): + current[part] = {} if not is_last else json_value + elif is_last: + current[part] = json_value + current = current[part] + + return payload + + +def remove_json_value(path: str, payload: dict, json_path: str, verbose: bool = True): + + if not isinstance(payload, dict): + raise ValueError( + f"{icons.red_dot} Cannot apply json_path to non-dictionary payload in '{path}'." + ) + + jsonpath_expr = parse(json_path) + matches = jsonpath_expr.find(payload) + + if not matches and verbose: + print( + f"{icons.red_dot} No match found for '{json_path}' in '{path}'. Skipping." + ) + return payload + + for match in matches: + parent = match.context.value + path_expr = match.path + + if isinstance(path_expr, Fields): + key = path_expr.fields[0] + if key in parent: + del parent[key] + if verbose: + print(f"{icons.green_dot} Removed key '{key}' from '{path}'.") + elif isinstance(path_expr, Index): + index = path_expr.index + if isinstance(parent, list) and 0 <= index < len(parent): + parent.pop(index) + if verbose: + print(f"{icons.green_dot} Removed index [{index}] from '{path}'.") + + return payload diff --git a/src/sempy_labs/_icons.py b/src/sempy_labs/_icons.py index 6d9a7a49..c2ec7c7b 100644 --- a/src/sempy_labs/_icons.py +++ b/src/sempy_labs/_icons.py @@ -1,6 +1,6 @@ -green_dot = "\U0001F7E2" -yellow_dot = "\U0001F7E1" -red_dot = "\U0001F534" +green_dot = "\U0001f7e2" +yellow_dot = "\U0001f7e1" +red_dot = "\U0001f534" in_progress = "⌛" checked = "\u2611" unchecked = "\u2610" @@ -8,11 +8,11 @@ end_bold = "\033[0m" bullet = "\u2022" warning = "⚠️" -error = "\u274C" +error = "\u274c" info = "ℹ️" measure_icon = "\u2211" -table_icon = "\u229E" -column_icon = "\u229F" +table_icon = "\u229e" +column_icon = "\u229f" model_bpa_name = "ModelBPA" report_bpa_name = "ReportBPA" severity_mapping = {warning: "Warning", error: "Error", info: "Info"} diff --git a/src/sempy_labs/_job_scheduler.py b/src/sempy_labs/_job_scheduler.py index cc876f1f..01e4fe3a 100644 --- a/src/sempy_labs/_job_scheduler.py +++ b/src/sempy_labs/_job_scheduler.py @@ -1,6 +1,6 @@ from sempy._utils._log import log import pandas as pd -from typing import Optional +from typing import Optional, List from sempy_labs._helper_functions import ( resolve_workspace_name_and_id, resolve_item_name_and_id, @@ -189,7 +189,7 @@ def run_on_demand_item_job( Parameters ---------- item : str | uuid.UUID - The item name or ID + The item name or ID. type : str, default=None The item `type `_. If specifying the item name as the item, the item type is required. job_type : str, default="DefaultJob" @@ -213,3 +213,227 @@ def run_on_demand_item_job( ) print(f"{icons.green_dot} The '{item_name}' {type.lower()} has been executed.") + + +def create_item_schedule_cron( + item: str | UUID, + type: str, + start_date_time: str, + end_date_time: str, + local_time_zone: str, + job_type: str = "DefaultJob", + interval_minutes: int = 10, + enabled: bool = True, + workspace: Optional[str | UUID] = None, +): + """ + Create a new schedule for an item based on a `chronological time `_. + + This is a wrapper function for the following API: `Job Scheduler - Create Item Schedule `_. + + Parameters + ---------- + item : str | uuid.UUID + The item name or ID. + type : str + The item `type `_. If specifying the item name as the item, the item type is required. + start_date_time: str + The start date and time of the schedule. Example: "2024-04-28T00:00:00". + end_date_time: str + The end date and time of the schedule. Must be later than the start_date_time. Example: "2024-04-30T23:59:00". + local_time_zone: str + The `time zone `_ of the schedule. Example: "Central Standard Time". + job_type : str, default="DefaultJob" + The job type. + interval_minutes: int, default=10 + The schedule interval (in minutes). + enabled: bool, default=True + Whether the schedule is enabled. + workspace : str | uuid.UUID, default=None + The workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + (item_name, item_id) = resolve_item_name_and_id( + item=item, type=type, workspace=workspace + ) + + payload = { + "enabled": enabled, + "configuration": { + "startDateTime": start_date_time, + "endDateTime": end_date_time, + "localTimeZoneId": local_time_zone, + "type": "Cron", + "interval": interval_minutes, + }, + } + + _base_api( + request=f"v1/workspaces/{workspace_id}/items/{item_id}/jobs/{job_type}/schedules", + method="post", + payload=payload, + status_codes=201, + ) + + print( + f"{icons.green_dot} The schedule for the '{item_name}' {type.lower()} has been created." + ) + + +def create_item_schedule_daily( + item: str | UUID, + type: str, + start_date_time: str, + end_date_time: str, + local_time_zone: str, + times: List[str], + job_type: str = "DefaultJob", + enabled: bool = True, + workspace: Optional[str | UUID] = None, +): + """ + Create a new daily schedule for an item. + + This is a wrapper function for the following API: `Job Scheduler - Create Item Schedule `_. + + Parameters + ---------- + item : str | uuid.UUID + The item name or ID. + type : str + The item `type `_. If specifying the item name as the item, the item type is required. + start_date_time: str + The start date and time of the schedule. Example: "2024-04-28T00:00:00". + end_date_time: str + The end date and time of the schedule. Must be later than the start_date_time. Example: "2024-04-30T23:59:00". + local_time_zone: str + The `time zone `_ of the schedule. Example: "Central Standard Time". + times : List[str] + A list of time slots in hh:mm format, at most 100 elements are allowed. Example: ["00:00", "12:00"]. + job_type : str, default="DefaultJob" + The job type. + enabled: bool, default=True + Whether the schedule is enabled. + workspace : str | uuid.UUID, default=None + The workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + (item_name, item_id) = resolve_item_name_and_id( + item=item, type=type, workspace=workspace + ) + + payload = { + "enabled": enabled, + "configuration": { + "startDateTime": start_date_time, + "endDateTime": end_date_time, + "localTimeZoneId": local_time_zone, + "type": "Daily", + "times": times, + }, + } + + _base_api( + request=f"v1/workspaces/{workspace_id}/items/{item_id}/jobs/{job_type}/schedules", + method="post", + payload=payload, + status_codes=201, + ) + + print( + f"{icons.green_dot} The schedule for the '{item_name}' {type.lower()} has been created." + ) + + +def create_item_schedule_weekly( + item: str | UUID, + type: str, + start_date_time: str, + end_date_time: str, + local_time_zone: str, + times: List[str], + weekdays: List[str], + job_type: str = "DefaultJob", + enabled: bool = True, + workspace: Optional[str | UUID] = None, +): + """ + Create a new daily schedule for an item. + + This is a wrapper function for the following API: `Job Scheduler - Create Item Schedule `_. + + Parameters + ---------- + item : str | uuid.UUID + The item name or ID. + type : str + The item `type `_. If specifying the item name as the item, the item type is required. + start_date_time: str + The start date and time of the schedule. Example: "2024-04-28T00:00:00". + end_date_time: str + The end date and time of the schedule. Must be later than the start_date_time. Example: "2024-04-30T23:59:00". + local_time_zone: str + The `time zone `_ of the schedule. Example: "Central Standard Time". + times : List[str] + A list of time slots in hh:mm format, at most 100 elements are allowed. Example: ["00:00", "12:00"]. + weekdays : List[str] + A list of weekdays. Example: ["Monday", "Tuesday"]. + job_type : str, default="DefaultJob" + The job type. + enabled: bool, default=True + Whether the schedule is enabled. + workspace : str | uuid.UUID, default=None + The workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + (item_name, item_id) = resolve_item_name_and_id( + item=item, type=type, workspace=workspace + ) + + weekdays = [w.capitalize() for w in weekdays] + weekday_list = [ + "Sunday", + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + ] + for weekday in weekdays: + if weekday not in weekday_list: + raise ValueError( + f"{icons.red_dot} Invalid weekday: {weekday}. Must be one of {weekday_list}." + ) + + payload = { + "enabled": enabled, + "configuration": { + "startDateTime": start_date_time, + "endDateTime": end_date_time, + "localTimeZoneId": local_time_zone, + "type": "Weekly", + "times": times, + "weekdays": weekdays, + }, + } + + _base_api( + request=f"v1/workspaces/{workspace_id}/items/{item_id}/jobs/{job_type}/schedules", + method="post", + payload=payload, + status_codes=201, + ) + + print( + f"{icons.green_dot} The schedule for the '{item_name}' {type.lower()} has been created." + ) diff --git a/src/sempy_labs/_kql_databases.py b/src/sempy_labs/_kql_databases.py index 54d5e2a0..3cb3f34f 100644 --- a/src/sempy_labs/_kql_databases.py +++ b/src/sempy_labs/_kql_databases.py @@ -1,13 +1,16 @@ -import sempy.fabric as fabric import pandas as pd -import sempy_labs._icons as icons from typing import Optional from sempy_labs._helper_functions import ( resolve_workspace_name_and_id, _base_api, _create_dataframe, + delete_item, + create_item, + resolve_item_id, + resolve_workspace_id, ) from uuid import UUID +import sempy_labs._icons as icons def list_kql_databases(workspace: Optional[str | UUID] = None) -> pd.DataFrame: @@ -16,6 +19,8 @@ def list_kql_databases(workspace: Optional[str | UUID] = None) -> pd.DataFrame: This is a wrapper function for the following API: `Items - List KQL Databases `_. + Service Principal Authentication is supported (see `here `_ for examples). + Parameters ---------- workspace : str | uuid.UUID, default=None @@ -43,7 +48,9 @@ def list_kql_databases(workspace: Optional[str | UUID] = None) -> pd.DataFrame: (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) responses = _base_api( - request=f"v1/workspaces/{workspace_id}/kqlDatabases", uses_pagination=True + request=f"v1/workspaces/{workspace_id}/kqlDatabases", + uses_pagination=True, + client="fabric_sp", ) for r in responses: @@ -64,7 +71,7 @@ def list_kql_databases(workspace: Optional[str | UUID] = None) -> pd.DataFrame: return df -def create_kql_database( +def _create_kql_database( name: str, description: Optional[str] = None, workspace: Optional[str | UUID] = None ): """ @@ -84,27 +91,16 @@ def create_kql_database( or if no lakehouse attached, resolves to the workspace of the notebook. """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - - payload = {"displayName": name} - - if description: - payload["description"] = description - - _base_api( - request=f"v1/workspaces/{workspace_id}/kqlDatabases", - method="post", - payload=payload, - status_codes=[201, 202], - lro_return_status_code=True, - ) - - print( - f"{icons.green_dot} The '{name}' KQL database has been created within the '{workspace_name}' workspace." + create_item( + name=name, description=description, type="KQLDatabase", workspace=workspace ) -def delete_kql_database(name: str, workspace: Optional[str | UUID] = None): +def delete_kql_database( + kql_database: str | UUID, + workspace: Optional[str | UUID] = None, + **kwargs, +): """ Deletes a KQL database. @@ -112,23 +108,34 @@ def delete_kql_database(name: str, workspace: Optional[str | UUID] = None): Parameters ---------- - name: str - Name of the KQL database. + kql_database: str | uuid.UUID + Name or ID of the KQL database. workspace : str | uuid.UUID, default=None The Fabric workspace name or ID. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - kql_database_id = fabric.resolve_item_id( - item_name=name, type="KQLDatabase", workspace=workspace_id - ) + if "name" in kwargs: + kql_database = kwargs["name"] + print( + f"{icons.warning} The 'name' parameter is deprecated. Please use 'kql_database' instead." + ) + + delete_item(item=kql_database, type="KQLDatabase", workspace=workspace) + - _base_api( - request=f"/v1/workspaces/{workspace_id}/kqlDatabases/{kql_database_id}", - method="delete", +def _resolve_cluster_uri( + kql_database: str | UUID, workspace: Optional[str | UUID] = None +) -> str: + + workspace_id = resolve_workspace_id(workspace=workspace) + item_id = resolve_item_id( + item=kql_database, type="KQLDatabase", workspace=workspace ) - print( - f"{icons.green_dot} The '{name}' KQL database within the '{workspace_name}' workspace has been deleted." + response = _base_api( + request=f"/v1/workspaces/{workspace_id}/kqlDatabases/{item_id}", + client="fabric_sp", ) + + return response.json().get("properties", {}).get("queryServiceUri") diff --git a/src/sempy_labs/_kql_querysets.py b/src/sempy_labs/_kql_querysets.py index f68c974f..6f55d74d 100644 --- a/src/sempy_labs/_kql_querysets.py +++ b/src/sempy_labs/_kql_querysets.py @@ -1,4 +1,3 @@ -import sempy.fabric as fabric import pandas as pd import sempy_labs._icons as icons from typing import Optional @@ -6,6 +5,8 @@ resolve_workspace_name_and_id, _base_api, _create_dataframe, + delete_item, + create_item, ) from uuid import UUID @@ -74,27 +75,14 @@ def create_kql_queryset( or if no lakehouse attached, resolves to the workspace of the notebook. """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - - payload = {"displayName": name} - - if description: - payload["description"] = description - - _base_api( - request=f"v1/workspaces/{workspace_id}/kqlQuerysets", - method="post", - payload=payload, - status_codes=[201, 202], - lro_return_status_code=True, - ) - - print( - f"{icons.green_dot} The '{name}' KQL queryset has been created within the '{workspace_name}' workspace." + create_item( + name=name, description=description, type="KQLQueryset", workspace=workspace ) -def delete_kql_queryset(name: str, workspace: Optional[str | UUID] = None): +def delete_kql_queryset( + kql_queryset: str | UUID, workspace: Optional[str | UUID] = None, **kwargs +): """ Deletes a KQL queryset. @@ -102,23 +90,18 @@ def delete_kql_queryset(name: str, workspace: Optional[str | UUID] = None): Parameters ---------- - name: str - Name of the KQL queryset. + kql_queryset: str | uuid.UUID + Name or ID of the KQL queryset. workspace : str | uuid.UUID, default=None The Fabric workspace name or ID. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - kql_database_id = fabric.resolve_item_id( - item_name=name, type="KQLQueryset", workspace=workspace_id - ) + if "name" in kwargs: + kql_queryset = kwargs["name"] + print( + f"{icons.warning} The 'name' parameter is deprecated. Please use 'kql_queryset' instead." + ) - _base_api( - request=f"/v1/workspaces/{workspace_id}/kqlQuerysets/{kql_database_id}", - method="delete", - ) - print( - f"{icons.green_dot} The '{name}' KQL queryset within the '{workspace_name}' workspace has been deleted." - ) + delete_item(item=kql_queryset, type="KQLQueryset", workspace=workspace) diff --git a/src/sempy_labs/_kusto.py b/src/sempy_labs/_kusto.py new file mode 100644 index 00000000..2e217762 --- /dev/null +++ b/src/sempy_labs/_kusto.py @@ -0,0 +1,137 @@ +import requests +import pandas as pd +from sempy.fabric.exceptions import FabricHTTPException +from sempy._utils._log import log +import sempy_labs._icons as icons +from typing import Optional +from uuid import UUID +from sempy_labs._kql_databases import _resolve_cluster_uri +from sempy_labs._helper_functions import resolve_item_id + + +@log +def query_kusto( + query: str, + kql_database: str | UUID, + workspace: Optional[str | UUID] = None, + language: str = "kql", +) -> pd.DataFrame: + """ + Runs a KQL query against a KQL database. + + Parameters + ---------- + query : str + The query (supports KQL or SQL - make sure to specify the language parameter accordingly). + kql_database : str | uuid.UUID + The KQL database name or ID. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + language : str, default="kql" + The language of the query. Currently "kql' and "sql" are supported. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing the result of the KQL query. + """ + + import notebookutils + + language = language.lower() + if language not in ["kql", "sql"]: + raise ValueError( + f"{icons._red_dot} Invalid language '{language}'. Only 'kql' and 'sql' are supported." + ) + + cluster_uri = _resolve_cluster_uri(kql_database=kql_database, workspace=workspace) + token = notebookutils.credentials.getToken(cluster_uri) + + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + "Accept": "application/json", + } + + kql_database_id = resolve_item_id( + item=kql_database, type="KQLDatabase", workspace=workspace + ) + payload = {"db": kql_database_id, "csl": query} + if language == "sql": + payload["properties"] = {"Options": {"query_language": "sql"}} + + response = requests.post( + f"{cluster_uri}/v1/rest/query", + headers=headers, + json=payload, + ) + + if response.status_code != 200: + raise FabricHTTPException(response) + + results = response.json() + columns_info = results["Tables"][0]["Columns"] + rows = results["Tables"][0]["Rows"] + + df = pd.DataFrame(rows, columns=[col["ColumnName"] for col in columns_info]) + + return df + # for col_info in columns_info: + # col_name = col_info["ColumnName"] + # data_type = col_info["DataType"] + + # try: + # if data_type == "DateTime": + # df[col_name] = pd.to_datetime(df[col_name]) + # elif data_type in ["Int64", "Int32", "Long"]: + # df[col_name] = ( + # pd.to_numeric(df[col_name], errors="coerce") + # .fillna(0) + # .astype("int64") + # ) + # elif data_type == "Real" or data_type == "Double": + # df[col_name] = pd.to_numeric(df[col_name], errors="coerce") + # else: + # # Convert any other type to string, change as needed + # df[col_name] = df[col_name].astype(str) + # except Exception as e: + # print( + # f"{icons.yellow_dot} Could not convert column {col_name} to {data_type}, defaulting to string: {str(e)}" + # ) + # df[col_name] = df[col_name].astype(str) + + return df + + +@log +def query_workspace_monitoring( + query: str, workspace: Optional[str | UUID] = None, language: str = "kql" +) -> pd.DataFrame: + """ + Runs a query against the Fabric workspace monitoring database. Workspace monitoring must be enabled on the workspace to use this function. + + Parameters + ---------- + query : str + The query (supports KQL or SQL - make sure to specify the language parameter accordingly). + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + language : str, default="kql" + The language of the query. Currently "kql' and "sql" are supported. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing the result of the query. + """ + + return query_kusto( + query=query, + kql_database="Monitoring KQL database", + workspace=workspace, + language=language, + ) diff --git a/src/sempy_labs/_list_functions.py b/src/sempy_labs/_list_functions.py index 70e57c7a..05d8ad38 100644 --- a/src/sempy_labs/_list_functions.py +++ b/src/sempy_labs/_list_functions.py @@ -2,8 +2,6 @@ from sempy_labs._helper_functions import ( resolve_workspace_name_and_id, create_relationship_name, - resolve_lakehouse_id, - resolve_item_type, format_dax_object_name, resolve_dataset_name_and_id, _update_dataframe_datatypes, @@ -43,54 +41,32 @@ def get_object_level_security( from sempy_labs.tom import connect_semantic_model - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id) - columns = { "Role Name": "string", "Object Type": "string", "Table Name": "string", "Object Name": "string", + "Metadata Permission": "string", } df = _create_dataframe(columns=columns) with connect_semantic_model( - dataset=dataset_id, readonly=True, workspace=workspace_id + dataset=dataset, readonly=True, workspace=workspace ) as tom: for r in tom.model.Roles: for tp in r.TablePermissions: - if len(tp.FilterExpression) == 0: - columnCount = 0 - try: - columnCount = len(tp.ColumnPermissions) - except Exception: - pass - objectType = "Table" - if columnCount == 0: - new_data = { - "Role Name": r.Name, - "Object Type": objectType, - "Table Name": tp.Name, - "Object Name": tp.Name, - } - df = pd.concat( - [df, pd.DataFrame(new_data, index=[0])], ignore_index=True - ) - else: - objectType = "Column" - for cp in tp.ColumnPermissions: - new_data = { - "Role Name": r.Name, - "Object Type": objectType, - "Table Name": tp.Name, - "Object Name": cp.Name, - } - df = pd.concat( - [df, pd.DataFrame(new_data, index=[0])], - ignore_index=True, - ) - + for cp in tp.ColumnPermissions: + new_data = { + "Role Name": r.Name, + "Object Type": "Column", + "Table Name": tp.Name, + "Object Name": cp.Name, + "Metadata Permission": cp.Permission, + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) return df @@ -242,7 +218,11 @@ def list_tables( "Columns": sum( 1 for c in t.Columns if str(c.Type) != "RowNumber" ), - "% DB": round((total_size / model_size) * 100, 2), + "% DB": ( + round((total_size / model_size) * 100, 2) + if model_size not in (0, None, float("nan")) + else 0.0 + ), } ) @@ -534,7 +514,6 @@ def list_columns( from sempy_labs.directlake._get_directlake_lakehouse import ( get_direct_lake_lakehouse, ) - from pyspark.sql import SparkSession (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id) @@ -604,61 +583,12 @@ def list_columns( return dfC -def list_dashboards(workspace: Optional[str | UUID] = None) -> pd.DataFrame: - """ - Shows a list of the dashboards within a workspace. - - Parameters - ---------- - workspace : str | uuid.UUID, default=None - The Fabric workspace name or ID. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. - - Returns - ------- - pandas.DataFrame - A pandas dataframe showing the dashboards within a workspace. - """ - - columns = { - "Dashboard ID": "string", - "Dashboard Name": "string", - "Read Only": "bool", - "Web URL": "string", - "Embed URL": "string", - "Data Classification": "string", - "Users": "string", - "Subscriptions": "string", - } - df = _create_dataframe(columns=columns) - - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - - response = _base_api(request=f"/v1.0/myorg/groups/{workspace_id}/dashboards") - - for v in response.json().get("value", []): - new_data = { - "Dashboard ID": v.get("id"), - "Dashboard Name": v.get("displayName"), - "Read Only": v.get("isReadOnly"), - "Web URL": v.get("webUrl"), - "Embed URL": v.get("embedUrl"), - "Data Classification": v.get("dataClassification"), - "Users": v.get("users"), - "Subscriptions": v.get("subscriptions"), - } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - _update_dataframe_datatypes(dataframe=df, column_map=columns) - - return df - - def list_lakehouses(workspace: Optional[str | UUID] = None) -> pd.DataFrame: """ Shows the lakehouses within a workspace. + Service Principal Authentication is supported (see `here `_ for examples). + Parameters ---------- workspace : str | uuid.UUID, default=None @@ -687,7 +617,9 @@ def list_lakehouses(workspace: Optional[str | UUID] = None) -> pd.DataFrame: (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) responses = _base_api( - request=f"/v1/workspaces/{workspace_id}/lakehouses", uses_pagination=True + request=f"/v1/workspaces/{workspace_id}/lakehouses", + uses_pagination=True, + client="fabric_sp", ) for r in responses: @@ -1189,11 +1121,15 @@ def list_semantic_model_objects( def list_shortcuts( - lakehouse: Optional[str] = None, workspace: Optional[str | UUID] = None + lakehouse: Optional[str] = None, + workspace: Optional[str | UUID] = None, + path: Optional[str] = None, ) -> pd.DataFrame: """ Shows all shortcuts which exist in a Fabric lakehouse and their properties. + *** NOTE: This function has been moved to the lakehouse subpackage. Please repoint your code to use that location. *** + Parameters ---------- lakehouse : str, default=None @@ -1203,6 +1139,9 @@ def list_shortcuts( The name or ID of the Fabric workspace in which lakehouse resides. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. + path: str, default=None + The path within lakehouse where to look for shortcuts. If provied, must start with either "Files" or "Tables". Examples: Tables/FolderName/SubFolderName; Files/FolderName/SubFolderName. + Defaults to None which will retun all shortcuts on the given lakehouse Returns ------- @@ -1210,126 +1149,13 @@ def list_shortcuts( A pandas dataframe showing all the shortcuts which exist in the specified lakehouse. """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - - if lakehouse is None: - lakehouse_id = fabric.get_lakehouse_id() - else: - lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id) - - columns = { - "Shortcut Name": "string", - "Shortcut Path": "string", - "Source Type": "string", - "Source Workspace Id": "string", - "Source Workspace Name": "string", - "Source Item Id": "string", - "Source Item Name": "string", - "Source Item Type": "string", - "OneLake Path": "string", - "Connection Id": "string", - "Location": "string", - "Bucket": "string", - "SubPath": "string", - } - df = _create_dataframe(columns=columns) + from sempy_labs.lakehouse._shortcuts import list_shortcuts - responses = _base_api( - request=f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts", - uses_pagination=True, + print( + f"{icons.warning} This function has been moved to the lakehouse subpackage. Please repoint your code to use that location." ) - sources = ["s3Compatible", "googleCloudStorage", "externalDataShare", "amazonS3", "adlsGen2", "dataverse"] - sources_locpath = ["s3Compatible", "googleCloudStorage", "amazonS3", "adlsGen2"] - - for r in responses: - for i in r.get("value", []): - tgt = i.get("target", {}) - one_lake = tgt.get("oneLake", {}) - connection_id = next( - (tgt.get(source, {}).get("connectionId") for source in sources if tgt.get(source)), - None - ) - location = next( - (tgt.get(source, {}).get("location") for source in sources_locpath if tgt.get(source)), - None - ) - sub_path = next( - (tgt.get(source, {}).get("subpath") for source in sources_locpath if tgt.get(source)), - None - ) - source_workspace_id = one_lake.get("workspaceId") - source_item_id = one_lake.get("itemId") - source_workspace_name = ( - fabric.resolve_workspace_name(source_workspace_id) - if source_workspace_id is not None - else None - ) - - new_data = { - "Shortcut Name": i.get("name"), - "Shortcut Path": i.get("path"), - "Source Type": tgt.get("type"), - "Source Workspace Id": source_workspace_id, - "Source Workspace Name": source_workspace_name, - "Source Item Id": source_item_id, - "Source Item Name": ( - fabric.resolve_item_name( - source_item_id, workspace=source_workspace_name - ) - if source_item_id is not None - else None - ), - "Source Item Type": ( - resolve_item_type(source_item_id, workspace=source_workspace_name) - if source_item_id is not None - else None - ), - "OneLake Path": one_lake.get("path"), - "Connection Id": connection_id, - "Location": location, - "Bucket": tgt.get("s3Compatible", {}).get("bucket"), - "SubPath": sub_path, - } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - return df - - -def list_capacities() -> pd.DataFrame: - """ - Shows the capacities and their properties. - - Returns - ------- - pandas.DataFrame - A pandas dataframe showing the capacities and their properties - """ - - columns = { - "Id": "string", - "Display Name": "string", - "Sku": "string", - "Region": "string", - "State": "string", - "Admins": "string", - } - df = _create_dataframe(columns=columns) - - response = _base_api(request="/v1.0/myorg/capacities") - - for i in response.json().get("value", []): - new_data = { - "Id": i.get("id").lower(), - "Display Name": i.get("displayName"), - "Sku": i.get("sku"), - "Region": i.get("region"), - "State": i.get("state"), - "Admins": [i.get("admins", [])], - } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - return df + return list_shortcuts(lakehouse=lakehouse, workspace=workspace, path=path) def list_reports_using_semantic_model( @@ -1371,7 +1197,7 @@ def list_reports_using_semantic_model( & (dfR["Dataset Workspace Id"] == workspace_id) ][["Name", "Id"]] dfR_filt.rename(columns={"Name": "Report Name", "Id": "Report Id"}, inplace=True) - dfR_filt["Report Worskpace Name"] = workspace_name + dfR_filt["Report Workspace Name"] = workspace_name dfR_filt["Report Workspace Id"] = workspace_id return dfR_filt @@ -1632,7 +1458,9 @@ def list_server_properties(workspace: Optional[str | UUID] = None) -> pd.DataFra A pandas dataframe showing a list of the server properties. """ - tom_server = fabric.create_tom_server(readonly=True, workspace=workspace) + tom_server = fabric.create_tom_server( + dataset=None, readonly=True, workspace=workspace + ) rows = [ { diff --git a/src/sempy_labs/_managed_private_endpoints.py b/src/sempy_labs/_managed_private_endpoints.py index d6b34087..dbe1d3ab 100644 --- a/src/sempy_labs/_managed_private_endpoints.py +++ b/src/sempy_labs/_managed_private_endpoints.py @@ -23,6 +23,8 @@ def create_managed_private_endpoint( This is a wrapper function for the following API: `Managed Private Endpoints - Create Workspace Managed Private Endpoint `. + Service Principal Authentication is supported (see `here `_ for examples). + Parameters ---------- name: str @@ -60,6 +62,7 @@ def create_managed_private_endpoint( status_codes=[201, 202], payload=request_body, lro_return_status_code=True, + client="fabric_sp", ) _print_success( item_name=name, @@ -77,6 +80,8 @@ def list_managed_private_endpoints( This is a wrapper function for the following API: `Managed Private Endpoints - List Workspace Managed Private Endpoints `. + Service Principal Authentication is supported (see `here `_ for examples). + Parameters ---------- workspace : str | uuid.UUID, default=None @@ -106,7 +111,7 @@ def list_managed_private_endpoints( responses = _base_api( request=f"/v1/workspaces/{workspace_id}/managedPrivateEndpoints", uses_pagination=True, - status_codes=200, + client="fabric_sp", ) for r in responses: @@ -134,6 +139,8 @@ def delete_managed_private_endpoint( This is a wrapper function for the following API: `Managed Private Endpoints - Delete Workspace Managed Private Endpoint `. + Service Principal Authentication is supported (see `here `_ for examples). + Parameters ---------- managed_private_endpoint: str | uuid.UUID @@ -162,7 +169,7 @@ def delete_managed_private_endpoint( _base_api( request=f"/v1/workspaces/{workspace_id}/managedPrivateEndpoints/{item_id}", method="delete", - status_codes=200, + client="fabric_sp", ) _print_success( diff --git a/src/sempy_labs/_mirrored_databases.py b/src/sempy_labs/_mirrored_databases.py index b2c3bac9..ae6a83ac 100644 --- a/src/sempy_labs/_mirrored_databases.py +++ b/src/sempy_labs/_mirrored_databases.py @@ -1,4 +1,3 @@ -import sempy.fabric as fabric import pandas as pd from typing import Optional from sempy_labs._helper_functions import ( @@ -6,9 +5,11 @@ _decode_b64, _update_dataframe_datatypes, _base_api, - _print_success, resolve_item_id, _create_dataframe, + delete_item, + create_item, + get_item_definition, ) import sempy_labs._icons as icons import base64 @@ -21,6 +22,8 @@ def list_mirrored_databases(workspace: Optional[str | UUID] = None) -> pd.DataFr This is a wrapper function for the following API: `Items - List Mirrored Databases `_. + Service Principal Authentication is supported (see `here `_ for examples). + Parameters ---------- workspace : str | uuid.UUID, default=None @@ -49,8 +52,8 @@ def list_mirrored_databases(workspace: Optional[str | UUID] = None) -> pd.DataFr (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) responses = _base_api( request=f"/v1/workspaces/{workspace_id}/mirroredDatabases", - status_codes=200, uses_pagination=True, + client="fabric_sp", ) for r in responses: @@ -92,21 +95,8 @@ def create_mirrored_database( or if no lakehouse attached, resolves to the workspace of the notebook. """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - - payload = {"displayName": name} - - if description: - payload["description"] = description - - _base_api( - request=f"/v1/workspaces/{workspace_id}/mirroredDatabases", - status_codes=201, - method="post", - payload=payload, - ) - _print_success( - item_name=name, item_type="mirrored database", workspace_name=workspace_name + create_item( + name=name, description=description, type="MirroredDatabase", workspace=workspace ) @@ -128,15 +118,7 @@ def delete_mirrored_database( or if no lakehouse attached, resolves to the workspace of the notebook. """ - item_id = resolve_item_id( - item=mirrored_database, type="MirroredDatabase", workspace=workspace - ) - fabric.delete_item(item_id=item_id, workspace=workspace) - _print_success( - item_name=mirrored_database, - item_type="mirrored database", - workspace_name=workspace, - ) + delete_item(item=mirrored_database, type="MirroredDatabase", workspace=workspace) def get_mirroring_status( @@ -307,7 +289,7 @@ def get_mirrored_database_definition( mirrored_database: str | UUID, workspace: Optional[str | UUID] = None, decode: bool = True, -) -> str: +) -> dict: """ Obtains the mirrored database definition. @@ -327,31 +309,17 @@ def get_mirrored_database_definition( Returns ------- - str + dict The mirrored database definition. """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - item_id = resolve_item_id( - item=mirrored_database, type="MirroredDatabase", workspace=workspace + return get_item_definition( + item=mirrored_database, + type="MirroredDatabase", + workspace=workspace, + return_dataframe=False, + decode=decode, ) - result = _base_api( - request=f"/v1/workspaces/{workspace_id}/mirroredDatabases/{item_id}/getDefinition", - method="post", - status_codes=200, - lro_return_json=True, - ) - - df_items = pd.json_normalize(result["definition"]["parts"]) - df_items_filt = df_items[df_items["path"] == "mirroredDatabase.json"] - payload = df_items_filt["payload"].iloc[0] - - if decode: - result = _decode_b64(payload) - else: - result = payload - - return result def update_mirrored_database_definition( diff --git a/src/sempy_labs/_ml_experiments.py b/src/sempy_labs/_ml_experiments.py index cff5655d..8c9baaad 100644 --- a/src/sempy_labs/_ml_experiments.py +++ b/src/sempy_labs/_ml_experiments.py @@ -1,12 +1,11 @@ -import sempy.fabric as fabric import pandas as pd from typing import Optional from sempy_labs._helper_functions import ( resolve_workspace_name_and_id, _base_api, - _print_success, - resolve_item_id, + delete_item, _create_dataframe, + create_item, ) from uuid import UUID @@ -74,32 +73,15 @@ def create_ml_experiment( name: str Name of the ML experiment. description : str, default=None - A description of the environment. + A description of the ML experiment. workspace : str | uuid.UUID, default=None The Fabric workspace name or ID. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - - payload = {"displayName": name} - - if description: - payload["description"] = description - - _base_api( - request=f"/v1/workspaces/{workspace_id}/mlExperiments", - method="post", - payload=payload, - status_codes=[201, 202], - lro_return_status_code=True, - ) - _print_success( - item_name=name, - item_type="ML experiment", - workspace_name=workspace_name, - action="created", + create_item( + name=name, description=description, type="MLExperiment", workspace=workspace ) @@ -119,11 +101,4 @@ def delete_ml_experiment(name: str, workspace: Optional[str | UUID] = None): or if no lakehouse attached, resolves to the workspace of the notebook. """ - item_id = resolve_item_id(item=name, type="MLExperiment", workspace=workspace) - fabric.delete_item(item_id=item_id, workspace=workspace) - _print_success( - item_name=name, - item_type="ML Experiment", - workspace_name=workspace, - action="deleted", - ) + delete_item(item=name, type="MLExperiment", workspace=workspace) diff --git a/src/sempy_labs/_ml_models.py b/src/sempy_labs/_ml_models.py index d1447c34..92bc9e2d 100644 --- a/src/sempy_labs/_ml_models.py +++ b/src/sempy_labs/_ml_models.py @@ -1,12 +1,11 @@ -import sempy.fabric as fabric import pandas as pd from typing import Optional from sempy_labs._helper_functions import ( resolve_workspace_name_and_id, _base_api, - resolve_item_id, - _print_success, + delete_item, _create_dataframe, + create_item, ) from uuid import UUID @@ -81,26 +80,7 @@ def create_ml_model( or if no lakehouse attached, resolves to the workspace of the notebook. """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - - payload = {"displayName": name} - - if description: - payload["description"] = description - - _base_api( - request=f"/v1/workspaces/{workspace_id}/mlModels", - method="post", - status_codes=[201, 202], - payload=payload, - lro_return_status_code=True, - ) - _print_success( - item_name=name, - item_type="ML Model", - workspace_name=workspace_name, - action="created", - ) + create_item(name=name, description=description, type="MLModel", workspace=workspace) def delete_ml_model(name: str | UUID, workspace: Optional[str | UUID] = None): @@ -119,8 +99,4 @@ def delete_ml_model(name: str | UUID, workspace: Optional[str | UUID] = None): or if no lakehouse attached, resolves to the workspace of the notebook. """ - item_id = resolve_item_id(item=name, type="MLModel", workspace=workspace) - fabric.delete_item(item_id=item_id, workspace=workspace) - _print_success( - item_name=name, item_type="ML Model", workspace_name=workspace, action="deleted" - ) + delete_item(item=name, type="MLModel", workspace=workspace) diff --git a/src/sempy_labs/_model_bpa.py b/src/sempy_labs/_model_bpa.py index ac94f719..abec4a98 100644 --- a/src/sempy_labs/_model_bpa.py +++ b/src/sempy_labs/_model_bpa.py @@ -6,7 +6,6 @@ from sempy_labs._model_dependencies import get_model_calc_dependencies from sempy_labs._helper_functions import ( format_dax_object_name, - resolve_lakehouse_name, create_relationship_name, save_as_delta_table, resolve_workspace_capacity, @@ -43,6 +42,8 @@ def run_model_bpa( """ Displays an HTML visualization of the results of the Best Practice Analyzer scan for a semantic model. + The Best Practice Analyzer rules are based on the rules defined `here `_. The framework for the Best Practice Analyzer and rules are based on the foundation set by `Tabular Editor `_. + Parameters ---------- dataset : str | uuid.UUID @@ -274,12 +275,17 @@ def translate_using_spark(rule_file): tom.all_columns(), lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name), ), + "Calculated Column": ( + tom.all_calculated_columns(), + lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name), + ), "Measure": (tom.all_measures(), lambda obj: obj.Name), "Hierarchy": ( tom.all_hierarchies(), lambda obj: format_dax_object_name(obj.Parent.Name, obj.Name), ), "Table": (tom.model.Tables, lambda obj: obj.Name), + "Calculated Table": (tom.all_calculated_tables(), lambda obj: obj.Name), "Role": (tom.model.Roles, lambda obj: obj.Name), "Model": (tom.model, lambda obj: obj.Model.Name), "Calculation Item": ( @@ -322,6 +328,10 @@ def translate_using_spark(rule_file): x = [nm(obj) for obj in tom.all_hierarchies() if expr(obj, tom)] elif scope == "Table": x = [nm(obj) for obj in tom.model.Tables if expr(obj, tom)] + elif scope == "Calculated Table": + x = [ + nm(obj) for obj in tom.all_calculated_tables() if expr(obj, tom) + ] elif scope == "Relationship": x = [nm(obj) for obj in tom.model.Relationships if expr(obj, tom)] elif scope == "Role": @@ -332,6 +342,12 @@ def translate_using_spark(rule_file): x = [ nm(obj) for obj in tom.all_calculation_items() if expr(obj, tom) ] + elif scope == "Calculated Column": + x = [ + nm(obj) + for obj in tom.all_calculated_columns() + if expr(obj, tom) + ] if len(x) > 0: new_data = { @@ -372,13 +388,7 @@ def translate_using_spark(rule_file): dfExport = finalDF.copy() delta_table_name = "modelbparesults" - lakehouse_id = fabric.get_lakehouse_id() - lake_workspace = fabric.get_workspace_id() - lakehouse = resolve_lakehouse_name( - lakehouse_id=lakehouse_id, workspace=lake_workspace - ) - - lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=lake_workspace) + lakeT = get_lakehouse_tables() lakeT_filt = lakeT[lakeT["Table Name"] == delta_table_name] dfExport["Severity"].replace(icons.severity_mapping, inplace=True) @@ -386,9 +396,7 @@ def translate_using_spark(rule_file): if len(lakeT_filt) == 0: runId = 1 else: - max_run_id = _get_column_aggregate( - lakehouse=lakehouse, table_name=delta_table_name - ) + max_run_id = _get_column_aggregate(table_name=delta_table_name) runId = max_run_id + 1 now = datetime.datetime.now() diff --git a/src/sempy_labs/_model_bpa_bulk.py b/src/sempy_labs/_model_bpa_bulk.py index 5d2e8a4c..70a8a5dd 100644 --- a/src/sempy_labs/_model_bpa_bulk.py +++ b/src/sempy_labs/_model_bpa_bulk.py @@ -2,11 +2,12 @@ import pandas as pd import datetime from sempy_labs._helper_functions import ( - resolve_lakehouse_name, save_as_delta_table, resolve_workspace_capacity, retry, _get_column_aggregate, + resolve_workspace_id, + resolve_lakehouse_name_and_id, ) from sempy_labs.lakehouse import ( get_lakehouse_tables, @@ -16,6 +17,7 @@ from typing import Optional, List from sempy._utils._log import log import sempy_labs._icons as icons +from uuid import UUID @log @@ -23,7 +25,7 @@ def run_model_bpa_bulk( rules: Optional[pd.DataFrame] = None, extended: bool = False, language: Optional[str] = None, - workspace: Optional[str | List[str]] = None, + workspace: Optional[str | UUID | List[str | UUID]] = None, skip_models: Optional[str | List[str]] = ["ModelBPA", "Fabric Capacity Metrics"], skip_models_in_workspace: Optional[dict] = None, ): @@ -41,8 +43,8 @@ def run_model_bpa_bulk( language : str, default=None The language (code) in which the rules will appear. For example, specifying 'it-IT' will show the Rule Name, Category and Description in Italian. Defaults to None which resolves to English. - workspace : str | List[str], default=None - The workspace or list of workspaces to scan. + workspace : str | uuid.UUID | List[str | uuid.UUID], default=None + The workspace or list of workspaces to scan. Supports both the workspace name and the workspace id. Defaults to None which scans all accessible workspaces. skip_models : str | List[str], default=['ModelBPA', 'Fabric Capacity Metrics'] The semantic models to always skip when running this analysis. @@ -66,17 +68,12 @@ def run_model_bpa_bulk( now = datetime.datetime.now() output_table = "modelbparesults" - lakehouse_workspace = fabric.resolve_workspace_name() - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name( - lakehouse_id=lakehouse_id, workspace=lakehouse_workspace - ) - lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=lakehouse_workspace) + lakeT = get_lakehouse_tables() lakeT_filt = lakeT[lakeT["Table Name"] == output_table] - if len(lakeT_filt) == 0: + if lakeT_filt.empty: runId = 1 else: - max_run_id = _get_column_aggregate(lakehouse=lakehouse, table_name=output_table) + max_run_id = _get_column_aggregate(table_name=output_table) runId = max_run_id + 1 if isinstance(workspace, str): @@ -86,14 +83,14 @@ def run_model_bpa_bulk( if workspace is None: dfW_filt = dfW.copy() else: - dfW_filt = dfW[dfW["Name"].isin(workspace)] + dfW_filt = dfW[(dfW["Name"].isin(workspace)) | (dfW["Id"].isin(workspace))] - if len(dfW_filt) == 0: + if dfW_filt.empty: raise ValueError( f"{icons.red_dot} There are no valid workspaces to assess. This is likely due to not having proper permissions to the workspace(s) entered in the 'workspace' parameter." ) - for i, r in dfW_filt.iterrows(): + for _, r in dfW_filt.iterrows(): wksp = r["Name"] wksp_id = r["Id"] capacity_id, capacity_name = resolve_workspace_capacity(workspace=wksp) @@ -108,7 +105,7 @@ def run_model_bpa_bulk( dfD = dfD[~dfD["Dataset Name"].isin(skip_models_wkspc)] # Exclude default semantic models - if len(dfD) > 0: + if not dfD.empty: dfI = fabric.list_items(workspace=wksp) filtered_df = dfI.groupby("Display Name").filter( lambda x: set(["Warehouse", "SemanticModel"]).issubset(set(x["Type"])) @@ -118,7 +115,7 @@ def run_model_bpa_bulk( skip_models.extend(default_semantic_models) dfD_filt = dfD[~dfD["Dataset Name"].isin(skip_models)] - if len(dfD_filt) > 0: + if not dfD_filt.empty: for _, r2 in dfD_filt.iterrows(): dataset_id = r2["Dataset Id"] dataset_name = r2["Dataset Name"] @@ -161,7 +158,7 @@ def run_model_bpa_bulk( ) print(e) - if len(df) == 0: + if df.empty: print( f"{icons.yellow_dot} No BPA results to save for the '{wksp}' workspace." ) @@ -170,7 +167,7 @@ def run_model_bpa_bulk( # Append save results individually for each workspace (so as not to create a giant dataframe) print( - f"{icons.in_progress} Saving the Model BPA results of the '{wksp}' workspace to the '{output_table}' within the '{lakehouse}' lakehouse within the '{lakehouse_workspace}' workspace..." + f"{icons.in_progress} Saving the Model BPA results of the '{wksp}' workspace to the '{output_table}' within the lakehouse attached to this notebook..." ) schema = { @@ -195,8 +192,8 @@ def run_model_bpa_bulk( @log def create_model_bpa_semantic_model( dataset: Optional[str] = icons.model_bpa_name, - lakehouse: Optional[str] = None, - lakehouse_workspace: Optional[str] = None, + lakehouse: Optional[str | UUID] = None, + lakehouse_workspace: Optional[str | UUID] = None, ): """ Dynamically generates a Direct Lake semantic model based on the 'modelbparesults' delta table which contains the Best Practice Analyzer results. @@ -209,16 +206,15 @@ def create_model_bpa_semantic_model( ---------- dataset : str, default='ModelBPA' Name of the semantic model to be created. - lakehouse : str, default=None + lakehouse : str | uuid.UUID, default=None Name of the Fabric lakehouse which contains the 'modelbparesults' delta table. Defaults to None which resolves to the default lakehouse attached to the notebook. - lakehouse_workspace : str, default=None + lakehouse_workspace : str | uuid.UUID, default=None The workspace in which the lakehouse resides. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. """ - from sempy_labs._helper_functions import resolve_lakehouse_name from sempy_labs.directlake import ( generate_shared_expression, add_table_to_direct_lake_semantic_model, @@ -226,22 +222,21 @@ def create_model_bpa_semantic_model( from sempy_labs import create_blank_semantic_model, refresh_semantic_model from sempy_labs.tom import connect_semantic_model - lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace) - - if lakehouse is None: - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name( - lakehouse_id=lakehouse_id, workspace=lakehouse_workspace - ) + lakehouse_workspace_id = resolve_workspace_id(workspace=lakehouse_workspace) + (lakehouse_id, lakehouse_name) = resolve_lakehouse_name_and_id( + lakehouse=lakehouse, workspace=lakehouse_workspace_id + ) # Generate the shared expression based on the lakehouse and lakehouse workspace expr = generate_shared_expression( - item_name=lakehouse, item_type="Lakehouse", workspace=lakehouse_workspace + item_name=lakehouse_name, + item_type="Lakehouse", + workspace=lakehouse_workspace_id, ) # Create blank model create_blank_semantic_model( - dataset=dataset, workspace=lakehouse_workspace, overwrite=True + dataset=dataset, workspace=lakehouse_workspace_id, overwrite=True ) @retry( @@ -250,7 +245,7 @@ def create_model_bpa_semantic_model( ) def dyn_connect(): with connect_semantic_model( - dataset=dataset, readonly=True, workspace=lakehouse_workspace + dataset=dataset, readonly=True, workspace=lakehouse_workspace_id ) as tom: tom.model @@ -259,7 +254,7 @@ def dyn_connect(): icons.sll_tags.append("ModelBPABulk") table_exists = False with connect_semantic_model( - dataset=dataset, readonly=False, workspace=lakehouse_workspace + dataset=dataset, readonly=False, workspace=lakehouse_workspace_id ) as tom: t_name = "BPAResults" t_name_full = f"'{t_name}'" @@ -274,11 +269,11 @@ def dyn_connect(): dataset=dataset, table_name=t_name, lakehouse_table_name="modelbparesults", - workspace=lakehouse_workspace, + workspace=lakehouse_workspace_id, refresh=False, ) with connect_semantic_model( - dataset=dataset, readonly=False, workspace=lakehouse_workspace + dataset=dataset, readonly=False, workspace=lakehouse_workspace_id ) as tom: # Fix column names for c in tom.all_columns(): @@ -377,4 +372,4 @@ def get_expr(table_name, calculation): # tom.add_measure(table_name=t_name, measure_name='Rules Followed', expression="[Rules] - [Rules Violated]") # Refresh the model - refresh_semantic_model(dataset=dataset, workspace=lakehouse_workspace) + refresh_semantic_model(dataset=dataset, workspace=lakehouse_workspace_id) diff --git a/src/sempy_labs/_model_bpa_rules.py b/src/sempy_labs/_model_bpa_rules.py index f5295fdb..01359f89 100644 --- a/src/sempy_labs/_model_bpa_rules.py +++ b/src/sempy_labs/_model_bpa_rules.py @@ -556,7 +556,7 @@ def model_bpa_rules( "Warning", "Use the DIVIDE function for division", lambda obj, tom: re.search( - r"\]\s*\/(?!\/)(?!\*)\" or \"\)\s*\/(?!\/)(?!\*)", + r"\]\s*\/(?!\/)(?!\*)|\)\s*\/(?!\/)(?!\*)", obj.Expression, flags=re.IGNORECASE, ), @@ -565,7 +565,12 @@ def model_bpa_rules( ), ( "DAX Expressions", - "Measure", + [ + "Measure", + "Calculated Table", + "Calculated Column", + "Calculation Item", + ], "Error", "Column references should be fully qualified", lambda obj, tom: any( @@ -576,7 +581,12 @@ def model_bpa_rules( ), ( "DAX Expressions", - "Measure", + [ + "Measure", + "Calculated Table", + "Calculated Column", + "Calculation Item", + ], "Error", "Measure references should be unqualified", lambda obj, tom: any( @@ -664,8 +674,18 @@ def model_bpa_rules( "Provide format string for 'Date' columns", lambda obj, tom: (re.search(r"date", obj.Name, flags=re.IGNORECASE)) and (obj.DataType == TOM.DataType.DateTime) - and (obj.FormatString != "mm/dd/yyyy"), - 'Columns of type "DateTime" that have "Month" in their names should be formatted as "mm/dd/yyyy".', + and ( + obj.FormatString.lower() + not in [ + "mm/dd/yyyy", + "mm-dd-yyyy", + "dd/mm/yyyy", + "dd-mm-yyyy", + "yyyy-mm-dd", + "yyyy/mm/dd", + ] + ), + 'Columns of type "DateTime" that have "Date" in their names should be formatted.', ), ( "Formatting", @@ -779,7 +799,7 @@ def model_bpa_rules( "Formatting", "Column", "Warning", - 'Provide format string for "Month" columns', + "Provide format string for 'Month' columns", lambda obj, tom: re.search(r"month", obj.Name, flags=re.IGNORECASE) and obj.DataType == TOM.DataType.DateTime and obj.FormatString != "MMMM yyyy", diff --git a/src/sempy_labs/_mounted_data_factories.py b/src/sempy_labs/_mounted_data_factories.py new file mode 100644 index 00000000..839d0dc6 --- /dev/null +++ b/src/sempy_labs/_mounted_data_factories.py @@ -0,0 +1,119 @@ +import pandas as pd +import json +from typing import Optional +from sempy_labs._helper_functions import ( + resolve_workspace_name_and_id, + _base_api, + _create_dataframe, + _update_dataframe_datatypes, + resolve_item_id, + _decode_b64, + delete_item, + get_item_definition, +) + +from uuid import UUID + + +def list_mounted_data_factories( + workspace: Optional[str | UUID] = None, +) -> pd.DataFrame: + """ + Shows a list of mounted data factories from the specified workspace. + + This is a wrapper function for the following API: `Items - List Mounted Data Factories `_. + + Parameters + ---------- + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of mounted data factories from the specified workspace. + """ + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + + columns = { + "Mounted Data Factory Name": "str", + "Mounted Data Factory Id": "str", + "Description": "str", + } + + df = _create_dataframe(columns=columns) + responses = _base_api( + request=f"/v1/workspaces/{workspace_id}/mountedDataFactories", + uses_pagination=True, + ) + + for r in responses: + for v in r.get("value", []): + new_data = { + "Mounted Data Factory Name": v.get("displayName"), + "Mounted Data Factory Id": v.get("id"), + "Description": v.get("description"), + } + + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + _update_dataframe_datatypes(dataframe=df, column_map=columns) + + return df + + +def get_mounted_data_factory_definition( + mounted_data_factory: str | UUID, workspace: Optional[str | UUID] = None +) -> dict: + """ + Returns the specified MountedDataFactory public definition. + + This is a wrapper function for the following API: `Items - Get Mounted Data Factory Definition `_. + + Parameters + ---------- + mounted_data_factory : str | uuid.UUID + The name or ID of the mounted data factory. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + dict + The 'mountedDataFactory-content.json' file from the mounted data factory definition. + """ + + return get_item_definition( + item=mounted_data_factory, + type="MountedDataFactory", + workspace=workspace, + return_dataframe=False, + ) + + +def delete_mounted_data_factory( + mounted_data_factory: str | UUID, workspace: Optional[str | UUID] +): + """ + Deletes the specified mounted data factory. + + This is a wrapper function for the following API: `Items - Delete Mounted Data Factory `_. + + Parameters + ---------- + mounted_data_factory : str | uuid.UUID + The name or ID of the mounted data factory. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + delete_item( + item=mounted_data_factory, type="MountedDataFactory", workspace=workspace + ) diff --git a/src/sempy_labs/_notebooks.py b/src/sempy_labs/_notebooks.py index 0af5ffee..3e16dc16 100644 --- a/src/sempy_labs/_notebooks.py +++ b/src/sempy_labs/_notebooks.py @@ -7,9 +7,11 @@ from sempy._utils._log import log from sempy_labs._helper_functions import ( resolve_workspace_name_and_id, + resolve_workspace_id, _decode_b64, _base_api, resolve_item_id, + create_item, ) from sempy.fabric.exceptions import FabricHTTPException import os @@ -19,13 +21,20 @@ def _get_notebook_definition_base( - notebook_name: str, workspace: Optional[str | UUID] = None + notebook_name: str, + workspace: Optional[str | UUID] = None, + format: Optional[str] = None, ) -> pd.DataFrame: - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + workspace_id = resolve_workspace_id(workspace) item_id = resolve_item_id(item=notebook_name, type="Notebook", workspace=workspace) + + url = f"v1/workspaces/{workspace_id}/notebooks/{item_id}/getDefinition" + if format == "ipynb": + url += f"?format={format}" + result = _base_api( - request=f"v1/workspaces/{workspace_id}/notebooks/{item_id}/getDefinition", + request=url, method="post", lro_return_json=True, status_codes=None, @@ -52,7 +61,10 @@ def _get_notebook_type( def get_notebook_definition( - notebook_name: str, workspace: Optional[str | UUID] = None, decode: bool = True + notebook_name: str, + workspace: Optional[str | UUID] = None, + decode: bool = True, + format: Optional[str] = None, ) -> str: """ Obtains the notebook definition. @@ -70,6 +82,9 @@ def get_notebook_definition( decode : bool, default=True If True, decodes the notebook definition file into .ipynb format. If False, obtains the notebook definition file in base64 format. + format : str, default=None + The only supported value is ipynb + If provided the format will be in standard .ipynb otherwise the format will be in source code format which is GIT friendly ipynb Returns ------- @@ -78,7 +93,7 @@ def get_notebook_definition( """ df_items = _get_notebook_definition_base( - notebook_name=notebook_name, workspace=workspace + notebook_name=notebook_name, workspace=workspace, format=format ) df_items_filt = df_items[df_items["path"].str.startswith(_notebook_prefix)] payload = df_items_filt["payload"].iloc[0] @@ -144,6 +159,7 @@ def import_notebook_from_web( notebook_content=response.content, workspace=workspace_id, description=description, + format="ipynb", ) elif len(dfI_filt) > 0 and overwrite: print(f"{icons.info} Overwrite of notebooks is currently not supported.") @@ -162,6 +178,7 @@ def create_notebook( type: str = "py", description: Optional[str] = None, workspace: Optional[str | UUID] = None, + format: Optional[str] = None, ): """ Creates a new notebook with a definition within a workspace. @@ -181,42 +198,40 @@ def create_notebook( The name or ID of the workspace. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. + format : str, default=None + If 'ipynb' is provided than notebook_content should be standard ipynb format + otherwise notebook_content should be GIT friendly format """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) notebook_payload = base64.b64encode(notebook_content).decode("utf-8") - payload = { - "displayName": name, - "definition": { - "format": "ipynb", - "parts": [ - { - "path": f"{_notebook_prefix}.{type}", - "payload": notebook_payload, - "payloadType": "InlineBase64", - } - ], - }, + definition_payload = { + "parts": [ + { + "path": f"{_notebook_prefix}{type}", + "payload": notebook_payload, + "payloadType": "InlineBase64", + } + ], } - if description is not None: - payload["description"] = description - _base_api( - request=f"v1/workspaces/{workspace_id}/notebooks", - payload=payload, - method="post", - lro_return_status_code=True, - status_codes=[201, 202], - ) + if format == "ipynb": + definition_payload["format"] = "ipynb" - print( - f"{icons.green_dot} The '{name}' notebook was created within the '{workspace_name}' workspace." + create_item( + name=name, + type="Notebook", + workspace=workspace, + description=description, + definition=definition_payload, ) def update_notebook_definition( - name: str, notebook_content: str, workspace: Optional[str | UUID] = None + name: str, + notebook_content: str, + workspace: Optional[str | UUID] = None, + format: Optional[str] = None, ): """ Updates an existing notebook with a new definition. @@ -231,10 +246,15 @@ def update_notebook_definition( The name or ID of the workspace. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. + format : str, default=None + If 'ipynb' is provided than notebook_content should be standard ipynb format + otherwise notebook_content should be GIT friendly format """ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - notebook_payload = base64.b64encode(notebook_content) + notebook_payload = base64.b64encode(notebook_content.encode("utf-8")).decode( + "utf-8" + ) item_id = resolve_item_id(item=name, type="Notebook", workspace=workspace) type = _get_notebook_type(notebook_name=name, workspace=workspace) @@ -242,7 +262,7 @@ def update_notebook_definition( "definition": { "parts": [ { - "path": f"{_notebook_prefix}.{type}", + "path": f"{_notebook_prefix}{type}", "payload": notebook_payload, "payloadType": "InlineBase64", } @@ -250,6 +270,9 @@ def update_notebook_definition( }, } + if format == "ipynb": + payload["definition"]["format"] = "ipynb" + _base_api( request=f"v1/workspaces/{workspace_id}/notebooks/{item_id}/updateDefinition", payload=payload, diff --git a/src/sempy_labs/_one_lake_integration.py b/src/sempy_labs/_one_lake_integration.py index 87b7e200..ad8fca27 100644 --- a/src/sempy_labs/_one_lake_integration.py +++ b/src/sempy_labs/_one_lake_integration.py @@ -5,6 +5,7 @@ from sempy_labs._helper_functions import ( resolve_workspace_name_and_id, resolve_dataset_name_and_id, + resolve_workspace_id, ) import sempy_labs._icons as icons from uuid import UUID @@ -43,7 +44,7 @@ def export_model_to_onelake( destination_workspace = workspace_name destination_workspace_id = workspace_id else: - destination_workspace_id = fabric.resolve_workspace_id(destination_workspace) + destination_workspace_id = resolve_workspace_id(workspace=destination_workspace) tmsl = f""" {{ diff --git a/src/sempy_labs/_semantic_models.py b/src/sempy_labs/_semantic_models.py index b5a80404..8759f6ba 100644 --- a/src/sempy_labs/_semantic_models.py +++ b/src/sempy_labs/_semantic_models.py @@ -1,5 +1,5 @@ from uuid import UUID -from typing import Optional +from typing import Optional, List import pandas as pd from sempy_labs._helper_functions import ( _create_dataframe, @@ -7,8 +7,10 @@ _update_dataframe_datatypes, resolve_workspace_name_and_id, resolve_dataset_name_and_id, + delete_item, ) import sempy_labs._icons as icons +import re def get_semantic_model_refresh_schedule( @@ -69,7 +71,9 @@ def get_semantic_model_refresh_schedule( def enable_semantic_model_scheduled_refresh( - dataset: str | UUID, workspace: Optional[str | UUID] = None, enable: bool = True, + dataset: str | UUID, + workspace: Optional[str | UUID] = None, + enable: bool = True, ): """ Enables the scheduled refresh for the specified dataset from the specified workspace. @@ -91,18 +95,18 @@ def enable_semantic_model_scheduled_refresh( (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace) df = get_semantic_model_refresh_schedule(dataset=dataset, workspace=workspace) - status = df['Enabled'].iloc[0] + status = df["Enabled"].iloc[0] if enable and status: - print(f"{icons.info} Scheduled refresh for the '{dataset_name}' within the '{workspace_name}' workspace is already enabled.") + print( + f"{icons.info} Scheduled refresh for the '{dataset_name}' within the '{workspace_name}' workspace is already enabled." + ) elif not enable and not status: - print(f"{icons.info} Scheduled refresh for the '{dataset_name}' within the '{workspace_name}' workspace is already disabled.") + print( + f"{icons.info} Scheduled refresh for the '{dataset_name}' within the '{workspace_name}' workspace is already disabled." + ) else: - payload = { - "value": { - "enabled": enable - } - } + payload = {"value": {"enabled": enable}} _base_api( request=f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/refreshSchedule", @@ -110,4 +114,116 @@ def enable_semantic_model_scheduled_refresh( payload=payload, ) - print(f"{icons.green_dot} Scheduled refresh for the '{dataset_name}' within the '{workspace_name}' workspace has been enabled.") + print( + f"{icons.green_dot} Scheduled refresh for the '{dataset_name}' within the '{workspace_name}' workspace has been enabled." + ) + + +def delete_semantic_model(dataset: str | UUID, workspace: Optional[str | UUID] = None): + """ + Deletes a semantic model. + + This is a wrapper function for the following API: `Items - Delete Semantic Model `_. + + Parameters + ---------- + dataset: str | uuid.UUID + Name or ID of the semantic model. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + delete_item(item=dataset, type="SemanticModel", workspace=workspace) + + +def update_semantic_model_refresh_schedule( + dataset: str | UUID, + days: Optional[str | List[str]] = None, + times: Optional[str | List[str]] = None, + time_zone: Optional[str] = None, + workspace: Optional[str | UUID] = None, +): + """ + Updates the refresh schedule for the specified dataset from the specified workspace. + + This is a wrapper function for the following API: `Datasets - Update Refresh Schedule In Group `_. + + Parameters + ---------- + dataset : str | uuid.UUID + Name or ID of the semantic model. + days : str | list[str], default=None + The days of the week to refresh the dataset. + Valid values are: "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday". + Defaults to None which means the refresh schedule will not be updated. + times : str | list[str], default=None + The times of the day to refresh the dataset. + Valid format is "HH:MM" (24-hour format). + Defaults to None which means the refresh schedule will not be updated. + time_zone : str, default=None + The time zone to use for the refresh schedule. + Defaults to None which means the refresh schedule will not be updated. + workspace : str | uuid.UUID, default=None + The workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace) + + payload = {"value": {}} + + def is_valid_time_format(time_str): + pattern = r"^(?:[01]\d|2[0-3]):[0-5]\d$" + return re.match(pattern, time_str) is not None + + weekdays = [ + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Sunday", + "Saturday", + ] + if days: + if isinstance(days, str): + days = [days] + for i in range(len(days)): + days[i] = days[i].capitalize() + if days[i] not in weekdays: + raise ValueError( + f"{icons.red_dot} Invalid day '{days[i]}'. Valid days are: {weekdays}" + ) + payload["value"]["days"] = days + if times: + if isinstance(times, str): + times = [times] + for i in range(len(times)): + if not is_valid_time_format(times[i]): + raise ValueError( + f"{icons.red_dot} Invalid time '{times[i]}'. Valid time format is 'HH:MM' (24-hour format)." + ) + payload["value"]["times"] = times + if time_zone: + payload["value"]["localTimeZoneId"] = time_zone + + if not payload.get("value"): + print( + f"{icons.info} No changes were made to the refresh schedule for the '{dataset_name}' within the '{workspace_name}' workspace." + ) + return + + _base_api( + request=f"/v1.0/myorg/groups/{workspace_id}/datasets/{dataset_id}/refreshSchedule", + method="patch", + client="fabric_sp", + payload=payload, + ) + + print( + f"{icons.green_dot} Refresh schedule for the '{dataset_name}' within the '{workspace_name}' workspace has been updated." + ) diff --git a/src/sempy_labs/_sql.py b/src/sempy_labs/_sql.py index 0d64d7ab..de3fd33a 100644 --- a/src/sempy_labs/_sql.py +++ b/src/sempy_labs/_sql.py @@ -34,7 +34,7 @@ def _bytes2mswin_bstr(value: bytes) -> bytes: class ConnectBase: def __init__( self, - item: str, + item: str | UUID, workspace: Optional[Union[str, UUID]] = None, timeout: Optional[int] = None, endpoint_type: str = "warehouse", @@ -45,22 +45,34 @@ def __init__( (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) # Resolve the appropriate ID and name (warehouse or lakehouse) - if endpoint_type == "warehouse": + if endpoint_type == "sqldatabase": + # SQLDatabase is has special case for resolving the name and id (resource_name, resource_id) = resolve_item_name_and_id( - item=item, type=endpoint_type.capitalize(), workspace=workspace_id + item=item, type="SQLDatabase", workspace=workspace_id ) - else: + elif endpoint_type == "lakehouse": (resource_name, resource_id) = resolve_lakehouse_name_and_id( - lakehouse=item, workspace=workspace_id + lakehouse=item, + workspace=workspace_id, + ) + else: + (resource_name, resource_id) = resolve_item_name_and_id( + item=item, workspace=workspace_id, type=endpoint_type.capitalize() ) + endpoint_for_url = ( + "sqlDatabases" if endpoint_type == "sqldatabase" else f"{endpoint_type}s" + ) + # Get the TDS endpoint response = _base_api( - request=f"v1/workspaces/{workspace_id}/{endpoint_type}s/{resource_id}" + request=f"v1/workspaces/{workspace_id}/{endpoint_for_url}/{resource_id}" ) if endpoint_type == "warehouse": tds_endpoint = response.json().get("properties", {}).get("connectionString") + elif endpoint_type == "sqldatabase": + tds_endpoint = response.json().get("properties", {}).get("serverFqdn") else: tds_endpoint = ( response.json() @@ -70,9 +82,12 @@ def __init__( ) # Set up the connection string - access_token = SynapseTokenProvider()() + access_token = SynapseTokenProvider()("sql") tokenstruct = _bytes2mswin_bstr(access_token.encode()) - conn_str = f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={tds_endpoint};DATABASE={resource_name};Encrypt=Yes;" + if endpoint_type == "sqldatabase": + conn_str = f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={tds_endpoint};DATABASE={resource_name}-{resource_id};Encrypt=Yes;" + else: + conn_str = f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={tds_endpoint};DATABASE={resource_name};Encrypt=Yes;" if timeout is not None: conn_str += f"Connect Timeout={timeout};" @@ -141,10 +156,24 @@ def close(self): class ConnectWarehouse(ConnectBase): def __init__( self, - warehouse: str, + warehouse: str | UUID, workspace: Optional[Union[str, UUID]] = None, - timeout: Optional[int] = None, + timeout: int = 30, ): + """ + Run a SQL or T-SQL query against a Fabric Warehouse. + + Parameters + ---------- + warehouse : str | uuid.UUID + The name or ID of the Fabric warehouse. + workspace : str | uuid.UUID, default=None + The name or ID of the workspace. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + timeout : int, default=30 + The timeout for the connection in seconds. + """ super().__init__( item=warehouse, workspace=workspace, @@ -156,13 +185,57 @@ def __init__( class ConnectLakehouse(ConnectBase): def __init__( self, - lakehouse: str, + lakehouse: Optional[str | UUID] = None, workspace: Optional[Union[str, UUID]] = None, - timeout: Optional[int] = None, + timeout: int = 30, ): + """ + Run a SQL or T-SQL query against a Fabric lakehouse. + + Parameters + ---------- + lakehouse : str | uuid.UUID, default=None + The name or ID of the Fabric lakehouse. + Defaults to None which resolves to the lakehouse attached to the notebook. + workspace : str | uuid.UUID, default=None + The name or ID of the workspace. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + timeout : int, default=30 + The timeout for the connection in seconds. + """ super().__init__( item=lakehouse, workspace=workspace, timeout=timeout, endpoint_type="lakehouse", ) + + +class ConnectSQLDatabase(ConnectBase): + def __init__( + self, + sql_database: str | UUID, + workspace: Optional[Union[str, UUID]] = None, + timeout: int = 30, + ): + """ + Run a SQL or T-SQL query against a Fabric SQL database. + + Parameters + ---------- + sql_database : str | uuid.UUID + The name or ID of the Fabric SQL database. + workspace : str | uuid.UUID, default=None + The name or ID of the workspace. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + timeout : int, default=30 + The timeout for the connection in seconds. + """ + super().__init__( + item=sql_database, + workspace=workspace, + timeout=timeout, + endpoint_type="sqldatabase", + ) diff --git a/src/sempy_labs/_sqldatabase.py b/src/sempy_labs/_sqldatabase.py new file mode 100644 index 00000000..e7bb97cd --- /dev/null +++ b/src/sempy_labs/_sqldatabase.py @@ -0,0 +1,188 @@ +from sempy_labs._helper_functions import ( + resolve_workspace_name_and_id, + _base_api, + _create_dataframe, + _update_dataframe_datatypes, + create_item, + delete_item, +) +import pandas as pd +from typing import Optional +from uuid import UUID + + +def create_sql_database( + name: str, description: Optional[str] = None, workspace: Optional[str | UUID] = None +): + """ + Creates a SQL database. + + This is a wrapper function for the following API: `Items - Create SQL Database `_. + + Parameters + ---------- + name: str + Name of the SQL database. + description : str, default=None + A description of the SQL database. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + create_item( + name=name, description=description, type="SQLDatabase", workspace=workspace + ) + + +def delete_sql_database( + sql_database: str | UUID, workspace: Optional[str | UUID] = None +): + """ + Deletes a SQL Database. + + This is a wrapper function for the following API: `Items - Delete SQL Database `_. + + Parameters + ---------- + sql_database: str | uuid.UUID + Name of the SQL database. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + delete_item(item=sql_database, type="SQLDatabase", workspace=workspace) + + +def list_sql_databases(workspace: Optional[str | UUID] = None) -> pd.DataFrame: + """ + Lists all SQL databases in the Fabric workspace. + + This is a wrapper function for the following API: `Items - List SQL Databases `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Parameters + ---------- + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of SQL databases in the Fabric workspace. + """ + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + + columns = { + "SQL Database Name": "string", + "SQL Database Id": "string", + "Description": "string", + "Connection Info": "string", + "Database Name": "string", + "Server FQDN": "string", + } + df = _create_dataframe(columns=columns) + + responses = _base_api( + request=f"/v1/workspaces/{workspace_id}/SQLDatabases", + uses_pagination=True, + client="fabric_sp", + ) + + for r in responses: + for v in r.get("value", []): + prop = v.get("properties", {}) + new_data = { + "SQL Database Name": v.get("displayName"), + "SQL Database Id": v.get("id"), + "Description": v.get("description"), + "Connection Info": prop.get("connectionInfo"), + "Database Name": prop.get("databaseName"), + "Server FQDN": prop.get("serverFqdn"), + } + + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + _update_dataframe_datatypes(dataframe=df, column_map=columns) + + return df + + +def get_sql_database_tables( + sql_database: str | UUID, workspace: Optional[str | UUID] = None +) -> pd.DataFrame: + """ + Shows a list of the tables in the Fabric SQLDabatse. This function is based on INFORMATION_SCHEMA.TABLES. + + Parameters + ---------- + sql_database : str | uuid.UUID + Name or ID of the Fabric SQLDabatase. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of the tables in the Fabric SQLDabatase. + """ + + from sempy_labs._sql import ConnectSQLDatabase + + with ConnectSQLDatabase(sql_database=sql_database, workspace=workspace) as sql: + df = sql.query( + """ + SELECT TABLE_SCHEMA AS [Schema], TABLE_NAME AS [Table Name], TABLE_TYPE AS [Table Type] + FROM INFORMATION_SCHEMA.TABLES + WHERE TABLE_TYPE = 'BASE TABLE' + """ + ) + + return df + + +def get_sql_database_columns( + sql_database: str | UUID, workspace: Optional[str | UUID] = None +) -> pd.DataFrame: + """ + Shows a list of the columns in each table within the Fabric SQLDabatase. This function is based on INFORMATION_SCHEMA.COLUMNS. + + Parameters + ---------- + sql_database : str | uuid.UUID + Name or ID of the Fabric SQLDabatase. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of the columns in each table within the Fabric SQLDabatase. + """ + + from sempy_labs._sql import ConnectSQLDatabase + + with ConnectSQLDatabase(sql_database=sql_database, workspace=workspace) as sql: + df = sql.query( + """ + SELECT t.TABLE_SCHEMA AS [Schema], t.TABLE_NAME AS [Table Name], c.COLUMN_NAME AS [Column Name], c.DATA_TYPE AS [Data Type], c.IS_NULLABLE AS [Is Nullable], c.CHARACTER_MAXIMUM_LENGTH AS [Character Max Length] + FROM INFORMATION_SCHEMA.TABLES AS t + LEFT JOIN INFORMATION_SCHEMA.COLUMNS AS c + ON t.TABLE_NAME = c.TABLE_NAME + AND t.TABLE_SCHEMA = c.TABLE_SCHEMA + WHERE t.TABLE_TYPE = 'BASE TABLE' + """ + ) + + return df diff --git a/src/sempy_labs/_tags.py b/src/sempy_labs/_tags.py new file mode 100644 index 00000000..de9e7ec5 --- /dev/null +++ b/src/sempy_labs/_tags.py @@ -0,0 +1,194 @@ +from sempy_labs._helper_functions import ( + _base_api, + _create_dataframe, + _update_dataframe_datatypes, + resolve_item_name_and_id, + resolve_workspace_name_and_id, + _is_valid_uuid, +) +import pandas as pd +from typing import Optional, List +from uuid import UUID +import sempy_labs._icons as icons + + +def list_tags() -> pd.DataFrame: + """ + Shows a list of all the tenant's tags. + + This is a wrapper function for the following API: `Tags - List Tags `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of all the tenant's tags. + """ + + columns = { + "Tag Name": "string", + "Tag Id": "string", + } + df = _create_dataframe(columns=columns) + + responses = _base_api( + request="/v1/tags", + uses_pagination=True, + client="fabric_sp", + ) + + dfs = [] + + for r in responses: + for v in r.get("value", []): + new_data = { + "Tag Name": v.get("displayName"), + "Tag Id": v.get("id"), + } + dfs.append(pd.DataFrame(new_data, index=[0])) + + if dfs: + df = pd.concat(dfs, ignore_index=True) + _update_dataframe_datatypes(dataframe=df, column_map=columns) + + return df + + +def resolve_tags(tags: str | List[str]) -> List[str]: + """ + Resolves the tags to a list of strings. + + Parameters + ---------- + tags : str | List[str] + The tags to resolve. + + Returns + ------- + List[str] + A list of resolved tags. + """ + + if isinstance(tags, str): + tags = [tags] + + if all(_is_valid_uuid(tag) for tag in tags): + return tags + + df = list_tags() + + tag_list = [] + for tag in tags: + if _is_valid_uuid(tag): + tag_list.append(tag) + else: + df_filt = df[df["Tag Name"] == tag] + if df_filt.empty: + raise ValueError(f"Tag '{tag}' not found in the tenant's tags.") + tag_id = df_filt["Tag Id"].iloc[0] + tag_list.append(tag_id) + + return tag_list + + +def apply_tags( + item: str | UUID, + type: str, + tags: str | UUID | List[str | UUID], + workspace: Optional[str | UUID] = None, +): + """ + Shows a list of all the tenant's tags. + + This is a wrapper function for the following API: `Tags - Apply Tags `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Parameters + ---------- + item : str | uuid.UUID + The name or ID of the item to apply tags to. + type : str + The type of the item to apply tags to. For example: "Lakehouse". + tags : str | uuid.UUID | List[str | uuid.UUID] + The name or ID of the tag(s) to apply to the item. + workspace : str | uuid.UUID, default=None + The workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + (item_name, item_id) = resolve_item_name_and_id(item, type, workspace_id) + + if isinstance(tags, str): + tags = [tags] + + tag_list = resolve_tags(tags) + + payload = { + "tags": tag_list, + } + + _base_api( + request=f"/v1/workspaces/{workspace_id}/items/{item_id}/applyTags", + client="fabric_sp", + method="post", + payload=payload, + ) + + print( + f"{icons.green_dot} Tags {tags} applied to the '{item_name}' {type.lower()} within the '{workspace_name}' workspace" + ) + + +def unapply_tags( + item: str | UUID, + type: str, + tags: str | UUID | List[str | UUID], + workspace: Optional[str | UUID] = None, +): + """ + Shows a list of all the tenant's tags. + + This is a wrapper function for the following API: `Tags - Unapply Tags `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Parameters + ---------- + item : str | uuid.UUID + The name or ID of the item to apply tags to. + type : str + The type of the item to apply tags to. For example: "Lakehouse". + tags : str | uuid.UUID | List[str | uuid.UUID] + The name or ID of the tag(s) to apply to the item. + workspace : str | uuid.UUID, default=None + The workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + (item_name, item_id) = resolve_item_name_and_id(item, type, workspace_id) + + if isinstance(tags, str): + tags = [tags] + + tag_list = resolve_tags(tags) + + payload = { + "tags": tag_list, + } + + _base_api( + request=f"/v1/workspaces/{workspace_id}/items/{item_id}/unapplyTags", + client="fabric_sp", + method="post", + payload=payload, + ) + + print( + f"{icons.green_dot} Tags {tags} applied to the '{item_name}' {type.lower()} within the '{workspace_name}' workspace" + ) diff --git a/src/sempy_labs/_user_delegation_key.py b/src/sempy_labs/_user_delegation_key.py new file mode 100644 index 00000000..fd105eb1 --- /dev/null +++ b/src/sempy_labs/_user_delegation_key.py @@ -0,0 +1,42 @@ +from sempy_labs.lakehouse._blobs import _request_blob_api +from sempy_labs._helper_functions import ( + _xml_to_dict, +) +from datetime import datetime, timedelta, timezone +import xml.etree.ElementTree as ET + + +def get_user_delegation_key(): + """ + Gets a key that can be used to sign a user delegation SAS (shared access signature). A user delegation SAS grants access to Azure Blob Storage resources by using Microsoft Entra credentials. + + This is a wrapper function for the following API: `Get User Delegation Key `_. + + Returns + ------- + str + The user delegation key value. + """ + + utc_now = datetime.now(timezone.utc) + start_time = utc_now + timedelta(minutes=2) + expiry_time = start_time + timedelta(minutes=60) + start_str = start_time.strftime("%Y-%m-%dT%H:%M:%SZ") + expiry_str = expiry_time.strftime("%Y-%m-%dT%H:%M:%SZ") + + payload = f""" + + {start_str} + {expiry_str} + """ + + response = _request_blob_api( + request="?restype=service&comp=userdelegationkey", + method="post", + payload=payload, + ) + + root = ET.fromstring(response.content) + response_json = _xml_to_dict(root) + + return response_json.get("UserDelegationKey", {}).get("Value", None) diff --git a/src/sempy_labs/_utils.py b/src/sempy_labs/_utils.py new file mode 100644 index 00000000..b651a3dc --- /dev/null +++ b/src/sempy_labs/_utils.py @@ -0,0 +1,42 @@ +item_types = { + "Dashboard": ["Dashboard", "dashboards"], + "DataPipeline": ["Data Pipeline", "dataPipelines", "pipeline-content.json"], + "Datamart": ["Datamart", "datamarts"], + "Environment": ["Environment", "environments"], + "Eventhouse": ["Eventhouse", "eventhouses", "EventhouseProperties.json"], + "Eventstream": ["Eventstream", "eventstreams", "eventstream.json"], + "GraphQLApi": ["GraphQL Api", "GraphQLApis"], + "KQLDashboard": ["KQL Dashboard", "kqlDashboards", "RealTimeDashboard.json"], + "KQLDatabase": [ + "KQL Database", + "kqlDatabases", + ], # "DatabaseProperties.json", "DatabaseSchema.kql" + "KQLQueryset": ["KQL Queryset", "kqlQuerysets", "RealTimeQueryset.json"], + "Lakehouse": ["Lakehouse", "lakehouses"], + "MLExperiment": ["ML Experiment", "mlExperiments"], + "MLModel": ["ML Model", "mlModels"], + "MirroredDatabase": [ + "Mirrored Database", + "mirroredDatabases", + "mirroredDatabase.json", + ], + "MirroredWarehouse": ["Mirrored Warehouse", "mirroredWarehouses"], + "MountedDataFactory": [ + "Mounted Data Factory", + "mountedDataFactories", + "mountedDataFactory-content.json", + ], + "Notebook": ["Notebook", "notebooks"], + "PaginatedReport": ["Paginated Report", "paginatedReports"], + "Reflex": ["Reflex", "reflexes", "ReflexEntities.json"], + "Report": ["Report", "reports", "report.json"], + "SQLDatabase": ["SQL Database", "sqlDatabases"], + "SQLEndpoint": ["SQL Endpoint", "sqlEndpoints"], + "SemanticModel": ["Semantic Model", "semanticModels", "model.bim"], + "SparkJobDefinition": [ + "Spark Job Definition", + "sparkJobDefinitions", + "SparkJobDefinitionV1.json", + ], + "Warehouse": ["Warehouse", "warehouses"], +} diff --git a/src/sempy_labs/_variable_libraries.py b/src/sempy_labs/_variable_libraries.py new file mode 100644 index 00000000..02f026bb --- /dev/null +++ b/src/sempy_labs/_variable_libraries.py @@ -0,0 +1,89 @@ +from sempy_labs._helper_functions import ( + resolve_workspace_name_and_id, + resolve_workspace_id, + _base_api, + _create_dataframe, + _update_dataframe_datatypes, + delete_item, +) +import pandas as pd +from typing import Optional +from uuid import UUID + + +def list_variable_libraries(workspace: Optional[str | UUID] = None) -> pd.DataFrame: + """ + Shows the variable libraries within a workspace. + + This is a wrapper function for the following API: `Items - List Variable Libraries `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Parameters + ---------- + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing the variable libraries within a workspace. + """ + + columns = { + "Variable Library Name": "string", + "Variable Library Id": "string", + "Description": "string", + "Active Value Set Name": "string", + } + df = _create_dataframe(columns=columns) + + workspace_id = resolve_workspace_id(workspace) + + responses = _base_api( + request=f"/v1/workspaces/{workspace_id}/VariableLibraries", + uses_pagination=True, + client="fabric_sp", + ) + + dfs = [] + for r in responses: + for v in r.get("value", []): + prop = v.get("properties", {}) + + new_data = { + "Variable Library Name": v.get("displayName"), + "Variable Library Id": v.get("id"), + "Description": v.get("description"), + "Active Value Set Name": prop.get("activeValueSetName"), + } + dfs.append(pd.DataFrame(new_data, index=[0])) + + if dfs: + df = pd.concat(dfs, ignore_index=True) + _update_dataframe_datatypes(dataframe=df, column_map=columns) + + return df + + +def delete_variable_library( + variable_library: str | UUID, workspace: Optional[str | UUID] = None +): + """ + Deletes a variable library. + + This is a wrapper function for the following API: `Items - Delete Variable Library `_. + + Parameters + ---------- + navariable_libraryme: str | uuid.UUID + Name or ID of the variable library. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + delete_item(item=variable_library, type="VariableLibrary", workspace=workspace) diff --git a/src/sempy_labs/_vertipaq.py b/src/sempy_labs/_vertipaq.py index 1f143040..62210b8c 100644 --- a/src/sempy_labs/_vertipaq.py +++ b/src/sempy_labs/_vertipaq.py @@ -8,17 +8,17 @@ import warnings from sempy_labs._helper_functions import ( format_dax_object_name, - resolve_lakehouse_name, save_as_delta_table, resolve_workspace_capacity, _get_column_aggregate, resolve_workspace_name_and_id, resolve_dataset_name_and_id, _create_spark_session, + resolve_workspace_id, + resolve_workspace_name, ) from sempy_labs._list_functions import list_relationships, list_tables from sempy_labs.lakehouse import lakehouse_attached, get_lakehouse_tables -from sempy_labs.directlake import get_direct_lake_source from typing import Optional from sempy._utils._log import log import sempy_labs._icons as icons @@ -33,9 +33,11 @@ def vertipaq_analyzer( export: Optional[str] = None, read_stats_from_data: bool = False, **kwargs, -): +) -> dict[str, pd.DataFrame]: """ - Displays an HTML visualization of the Vertipaq Analyzer statistics from a semantic model. + Displays an HTML visualization of the `Vertipaq Analyzer `_ statistics from a semantic model. + + `Vertipaq Analyzer `_ is an open-sourced tool built by SQLBI. It provides a detailed analysis of the VertiPaq engine, which is the in-memory engine used by Power BI and Analysis Services Tabular models. Parameters ---------- @@ -51,6 +53,11 @@ def vertipaq_analyzer( Default value: None. read_stats_from_data : bool, default=False Setting this parameter to true has the function get Column Cardinality and Missing Rows using DAX (Direct Lake semantic models achieve this using a Spark query to the lakehouse). + + Returns + ------- + dict[str, pandas.DataFrame] + A dictionary of pandas dataframes showing the vertipaq analyzer statistics. """ from sempy_labs.tom import connect_semantic_model @@ -167,10 +174,12 @@ def vertipaq_analyzer( ) artifact_type = None - if is_direct_lake: - artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = ( - get_direct_lake_source(dataset=dataset_id, workspace=workspace_id) - ) + lakehouse_workspace_id = None + lakehouse_name = None + # if is_direct_lake: + # artifact_type, lakehouse_name, lakehouse_id, lakehouse_workspace_id = ( + # get_direct_lake_source(dataset=dataset_id, workspace=workspace_id) + # ) dfR["Missing Rows"] = 0 dfR["Missing Rows"] = dfR["Missing Rows"].astype(int) @@ -189,8 +198,10 @@ def vertipaq_analyzer( & (~dfC["Column Name"].str.startswith("RowNumber-")) ] - object_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id) - current_workspace_id = fabric.get_workspace_id() + object_workspace = resolve_workspace_name( + workspace_id=lakehouse_workspace_id + ) + current_workspace_id = resolve_workspace_id() if current_workspace_id != lakehouse_workspace_id: lakeTables = get_lakehouse_tables( lakehouse=lakehouse_name, workspace=object_workspace @@ -502,6 +513,14 @@ def _style_columns_based_on_types(dataframe: pd.DataFrame, column_type_mapping): if export is None: visualize_vertipaq(dfs) + return { + "Model Summary": export_Model, + "Tables": export_Table, + "Partitions": export_Part, + "Columns": export_Col, + "Relationships": export_Rel, + "Hierarchies": export_Hier, + } # Export vertipaq to delta tables in lakehouse if export in ["table", "zip"]: @@ -511,22 +530,15 @@ def _style_columns_based_on_types(dataframe: pd.DataFrame, column_type_mapping): ) if export == "table": - lakehouse_id = fabric.get_lakehouse_id() - lake_workspace = fabric.resolve_workspace_name() - lakehouse = resolve_lakehouse_name( - lakehouse_id=lakehouse_id, workspace=lake_workspace - ) lakeTName = "vertipaqanalyzer_model" - lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=lake_workspace) + lakeT = get_lakehouse_tables() lakeT_filt = lakeT[lakeT["Table Name"] == lakeTName] if len(lakeT_filt) == 0: runId = 1 else: - max_run_id = _get_column_aggregate( - lakehouse=lakehouse, table_name=lakeTName - ) + max_run_id = _get_column_aggregate(table_name=lakeTName) runId = max_run_id + 1 dfMap = { diff --git a/src/sempy_labs/_vpax.py b/src/sempy_labs/_vpax.py new file mode 100644 index 00000000..33698140 --- /dev/null +++ b/src/sempy_labs/_vpax.py @@ -0,0 +1,388 @@ +import sempy +import re +from urllib.parse import urlparse +import sempy.fabric as fabric +import sys +from pathlib import Path +from typing import Optional +from uuid import UUID +from sempy_labs._helper_functions import ( + resolve_workspace_name_and_id, + resolve_dataset_name_and_id, + resolve_lakehouse_name_and_id, + _mount, + _get_column_aggregate, + resolve_item_type, + file_exists, + create_abfss_path_from_path, +) +from sempy._utils._log import log +import sempy_labs._icons as icons +import zipfile +import requests + + +VPA_VERSION = "1.10.0" +NUGET_BASE_URL = "https://www.nuget.org/api/v2/package" +ASSEMBLIES = [ + "Dax.Metadata", + "Dax.Model.Extractor", + "Dax.ViewVpaExport", + "Dax.Vpax", +] + +_vpa_initialized = False +current_dir = Path(__file__).parent +nuget_dir = current_dir / "nuget_dlls" + + +def find_lib_folder(pkg_folder: Path) -> Path: + lib_base = pkg_folder / "lib" + if not lib_base.exists(): + raise FileNotFoundError(f"No 'lib' directory in package {pkg_folder}") + + # Prefer netstandard2.0 if available + candidates = sorted(lib_base.iterdir()) + for preferred in ["netstandard2.0", "net6.0", "net5.0", "netcoreapp3.1", "net472"]: + if (lib_base / preferred).exists(): + return lib_base / preferred + + # Fallback: first available folder + for candidate in candidates: + if candidate.is_dir(): + return candidate + + raise FileNotFoundError(f"No usable framework folder found in {lib_base}") + + +def download_and_extract_package( + package_name: str, version: str, target_dir: Path +) -> Path: + nupkg_url = f"{NUGET_BASE_URL}/{package_name}/{version}" + nupkg_path = target_dir / f"{package_name}.{version}.nupkg" + + if not nupkg_path.exists(): + r = requests.get(nupkg_url) + r.raise_for_status() + target_dir.mkdir(parents=True, exist_ok=True) + with open(nupkg_path, "wb") as f: + f.write(r.content) + + extract_path = target_dir / f"{package_name}_{version}" + if not extract_path.exists(): + with zipfile.ZipFile(nupkg_path, "r") as zip_ref: + zip_ref.extractall(extract_path) + return extract_path + + +def download_and_load_nuget_package( + package_name, version, target_dir: Path = None, load_assembly=True +): + + from System.Reflection import Assembly + + if target_dir is None: + target_dir = nuget_dir + + # Download and extract + pkg_folder = download_and_extract_package(package_name, version, target_dir) + lib_folder = find_lib_folder(pkg_folder) + + dll_path = lib_folder / f"{package_name}.dll" + if not dll_path.exists(): + raise FileNotFoundError(f"{dll_path} not found") + + sys.path.append(str(lib_folder)) + if load_assembly: + Assembly.LoadFile(str(dll_path)) + + +def init_vertipaq_analyzer(): + global _vpa_initialized + if _vpa_initialized: + return + + from clr_loader import get_coreclr + from pythonnet import set_runtime + + # Load the runtime and set it BEFORE importing clr + runtime_config_path = current_dir / "dotnet_lib" / "dotnet.runtime.config.json" + rt = get_coreclr(runtime_config=str(runtime_config_path)) + set_runtime(rt) + + sempy.fabric._client._utils._init_analysis_services() + + from System.Reflection import Assembly + + for name in ASSEMBLIES: + download_and_load_nuget_package( + name, VPA_VERSION, nuget_dir, load_assembly=False + ) + + download_and_load_nuget_package("Newtonsoft.Json", "13.0.1") + download_and_load_nuget_package("System.IO.Packaging", "7.0.0") + + # For some reason I have to load these after and not inside the download_and_load_nuget_package function + dll_paths = [ + f"{nuget_dir}/Dax.Model.Extractor_1.10.0/lib/net6.0/Dax.Model.Extractor.dll", + f"{nuget_dir}/Dax.Metadata_1.10.0/lib/netstandard2.0/Dax.Metadata.dll", + f"{nuget_dir}/Dax.ViewVpaExport_1.10.0/lib/netstandard2.0/Dax.ViewVpaExport.dll", + f"{nuget_dir}/Dax.Vpax_1.10.0/lib/net6.0/Dax.Vpax.dll", + ] + for dll_path in dll_paths: + Assembly.LoadFile(dll_path) + + _vpa_initialized = True + + +@log +def create_vpax( + dataset: str | UUID, + workspace: Optional[str | UUID] = None, + lakehouse: Optional[str | UUID] = None, + lakehouse_workspace: Optional[str | UUID] = None, + file_path: Optional[str] = None, + read_stats_from_data: bool = False, + read_direct_query_stats: bool = False, + direct_lake_stats_mode: str = "ResidentOnly", + overwrite: bool = False, +): + """ + Creates a .vpax file for a semantic model and saves it to a lakehouse. This is based on `SQL BI's VertiPaq Analyzer `_. + + Parameters + ---------- + dataset : str | uuid.UUID + Name or ID of the semantic model. + workspace : str | uuid.UUID, default=None + The workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + lakehouse : str | uuid.UUID, default=None + The lakehouse name or ID. + Defaults to None which resolves to the attached lakehouse. + lakehouse_workspace : str | uuid.UUID, default=None + The workspace name or ID of the lakehouse. + Defaults to None which resolves to the workspace of the attached lakehouse. + file_path : str, default=None + The path where the .vpax file will be saved in the lakehouse. + Defaults to None which resolves to the dataset name. + read_stats_from_data : bool, default=False + Whether to read statistics from the data. + read_direct_query_stats : bool, default=False + Whether to analyze DirectQuery tables. + direct_lake_stats_mode : str, default='ResidentOnly' + The Direct Lake extraction mode. Options are 'ResidentOnly' or 'Full'. This parameter is ignored if read_stats_from_data is False. This parameter is only relevant for tables which use Direct Lake mode. + If set to 'ResidentOnly', column statistics are obtained only for the columns which are in memory. + If set to 'Full', column statistics are obtained for all columns - pending the proper identification of the Direct Lake source. + overwrite : bool, default=False + Whether to overwrite the .vpax file if it already exists in the lakehouse. + """ + + init_vertipaq_analyzer() + + import notebookutils + from Dax.Metadata import DirectLakeExtractionMode + from Dax.Model.Extractor import TomExtractor + from Dax.Vpax.Tools import VpaxTools + from Dax.ViewVpaExport import Model + from System.IO import MemoryStream, FileMode, FileStream, FileAccess, FileShare + + direct_lake_stats_mode = direct_lake_stats_mode.capitalize() + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id) + (lakehouse_workspace_name, lakehouse_workspace_id) = resolve_workspace_name_and_id( + lakehouse_workspace + ) + (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id( + lakehouse=lakehouse, workspace=lakehouse_workspace_id + ) + + local_path = _mount(lakehouse=lakehouse_id, workspace=lakehouse_workspace_id) + if file_path is None: + file_path = dataset_name + + if file_path.endswith(".vpax"): + file_path = file_path[:-5] + save_location = f"Files/{file_path}.vpax" + path = f"{local_path}/{save_location}" + + # Check if the .vpax file already exists in the lakehouse + if not overwrite: + new_path = create_abfss_path_from_path( + lakehouse_id, lakehouse_workspace_id, save_location + ) + if file_exists(new_path): + print( + f"{icons.warning} The {save_location} file already exists in the '{lakehouse_name}' lakehouse. Set overwrite=True to overwrite the file." + ) + return + + vpax_stream = MemoryStream() + extractor_app_name = "VPAX Notebook" + extractor_app_version = "1.0" + column_batch_size = 50 + token = notebookutils.credentials.getToken("pbi") + connection_string = f"data source=powerbi://api.powerbi.com/v1.0/myorg/{workspace_name};initial catalog={dataset_name};User ID=;Password={token};Persist Security Info=True;Impersonation Level=Impersonate" + + print(f"{icons.in_progress} Extracting .vpax metadata...") + + # Get stats for the model; for direct lake only get is_resident + dax_model = TomExtractor.GetDaxModel( + connection_string, + extractor_app_name, + extractor_app_version, + read_stats_from_data, + 0, + read_direct_query_stats, + DirectLakeExtractionMode.ResidentOnly, + column_batch_size, + ) + vpa_model = Model(dax_model) + tom_database = TomExtractor.GetDatabase(connection_string) + + # Calculate Direct Lake stats for columns which are IsResident=False + from sempy_labs.tom import connect_semantic_model + + with connect_semantic_model(dataset=dataset, workspace=workspace) as tom: + is_direct_lake = tom.is_direct_lake() + if read_stats_from_data and is_direct_lake and direct_lake_stats_mode == "Full": + + df_not_resident = fabric.evaluate_dax( + dataset=dataset, + workspace=workspace, + dax_string=""" SELECT [DIMENSION_NAME] AS [TableName], [ATTRIBUTE_NAME] AS [ColumnName] FROM $SYSTEM.DISCOVER_STORAGE_TABLE_COLUMNS WHERE NOT [ISROWNUMBER] AND NOT [DICTIONARY_ISRESIDENT]""", + ) + + import Microsoft.AnalysisServices.Tabular as TOM + + print(f"{icons.in_progress} Calculating Direct Lake statistics...") + + # For SQL endpoints (do once) + dfI = fabric.list_items(workspace=workspace) + # Get list of tables in Direct Lake mode which have columns that are not resident + tbls = [ + t + for t in tom.model.Tables + if t.Name in df_not_resident["TableName"].values + and any(p.Mode == TOM.ModeType.DirectLake for p in t.Partitions) + ] + for t in tbls: + column_cardinalities = {} + table_name = t.Name + partition = next(p for p in t.Partitions) + entity_name = partition.Source.EntityName + schema_name = partition.Source.SchemaName + if len(schema_name) == 0 or schema_name == "dbo": + schema_name = None + expr_name = partition.Source.ExpressionSource.Name + expr = tom.model.Expressions[expr_name].Expression + item_id = None + if "Sql.Database(" in expr: + matches = re.findall(r'"([^"]+)"', expr) + sql_endpoint_id = matches[1] + dfI_filt = dfI[dfI["Id"] == sql_endpoint_id] + item_name = ( + dfI_filt["Display Name"].iloc[0] if not dfI_filt.empty else None + ) + dfI_filt2 = dfI[ + (dfI["Display Name"] == item_name) + & (dfI["Type"].isin(["Lakehouse", "Warehouse"])) + ] + item_id = dfI_filt2["Id"].iloc[0] + item_type = dfI_filt2["Type"].iloc[0] + item_workspace_id = workspace_id + elif "AzureStorage.DataLake(" in expr: + match = re.search(r'AzureStorage\.DataLake\("([^"]+)"', expr) + if match: + url = match.group(1) + path_parts = urlparse(url).path.strip("/").split("/") + if len(path_parts) >= 2: + item_workspace_id, item_id = ( + path_parts[0], + path_parts[1], + ) + item_type = resolve_item_type( + item_id=item_id, workspace=workspace_id + ) + else: + raise NotImplementedError( + f"Direct Lake source '{expr}' is not supported. Please report this issue on GitHub (https://github.com/microsoft/semantic-link-labs/issues)." + ) + + if not item_id: + print( + f"{icons.info} Cannot determine the Direct Lake source of the '{table_name}' table." + ) + elif item_type == "Warehouse": + print( + f"{icons.info} The '{table_name}' table references a warehouse. Warehouses are not yet supported for this method." + ) + else: + df_not_resident_cols = df_not_resident[ + df_not_resident["TableName"] == table_name + ] + col_dict = { + c.Name: c.SourceColumn + for c in t.Columns + if c.Type != TOM.ColumnType.RowNumber + and c.Name in df_not_resident_cols["ColumnName"].values + } + col_agg = _get_column_aggregate( + lakehouse=item_id, + workspace=item_workspace_id, + table_name=entity_name, + schema_name=schema_name, + column_name=list(col_dict.values()), + function="distinct", + ) + column_cardinalities = { + column_name: col_agg[source_column] + for column_name, source_column in col_dict.items() + if source_column in col_agg + } + + # Update the dax_model file with column cardinalities + tbl = next( + table + for table in dax_model.Tables + if str(table.TableName) == table_name + ) + # print( + # f"{icons.in_progress} Calculating column cardinalities for the '{table_name}' table..." + # ) + cols = [ + col + for col in tbl.Columns + if str(col.ColumnType) != "RowNumber" + and str(col.ColumnName) in column_cardinalities + ] + for col in cols: + # print(str(col.ColumnName), col.ColumnCardinality) + col.ColumnCardinality = column_cardinalities.get( + str(col.ColumnName) + ) + + VpaxTools.ExportVpax(vpax_stream, dax_model, vpa_model, tom_database) + + print(f"{icons.in_progress} Exporting .vpax file...") + + mode = FileMode.Create + file_stream = FileStream(path, mode, FileAccess.Write, FileShare.Read) + vpax_stream.CopyTo(file_stream) + file_stream.Close() + + print( + f"{icons.green_dot} The {file_path}.vpax file has been saved in the '{lakehouse_name}' lakehouse within the '{lakehouse_workspace_name}' workspace." + ) + + +def _dax_distinctcount(table_name, columns): + + dax = "EVALUATE\nROW(" + for c in columns: + full_name = f"'{table_name}'[{c}]" + dax += f"""\n"{c}", DISTINCTCOUNT({full_name}),""" + + return f"{dax.rstrip(',')}\n)" diff --git a/src/sempy_labs/_warehouses.py b/src/sempy_labs/_warehouses.py index 0e1f57d9..03b9a782 100644 --- a/src/sempy_labs/_warehouses.py +++ b/src/sempy_labs/_warehouses.py @@ -1,9 +1,9 @@ -import sempy.fabric as fabric from sempy_labs._helper_functions import ( resolve_workspace_name_and_id, _base_api, _create_dataframe, _update_dataframe_datatypes, + delete_item, ) import pandas as pd from typing import Optional @@ -16,7 +16,7 @@ def create_warehouse( description: Optional[str] = None, case_insensitive_collation: bool = False, workspace: Optional[str | UUID] = None, -): +) -> UUID: """ Creates a Fabric warehouse. @@ -34,6 +34,11 @@ def create_warehouse( The Fabric workspace name or ID. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + uuid.UUID + The ID of the created warehouse. """ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) @@ -48,11 +53,11 @@ def create_warehouse( "defaultCollation" ] = "Latin1_General_100_CI_AS_KS_WS_SC_UTF8" - _base_api( + result = _base_api( request=f"/v1/workspaces/{workspace_id}/warehouses", payload=payload, method="post", - lro_return_status_code=True, + lro_return_json=True, status_codes=[201, 202], ) @@ -60,6 +65,8 @@ def create_warehouse( f"{icons.green_dot} The '{warehouse}' warehouse has been created within the '{workspace_name}' workspace." ) + return result.get("id") + def list_warehouses(workspace: Optional[str | UUID] = None) -> pd.DataFrame: """ @@ -67,6 +74,8 @@ def list_warehouses(workspace: Optional[str | UUID] = None) -> pd.DataFrame: This is a wrapper function for the following API: `Items - List Warehouses `_. + Service Principal Authentication is supported (see `here `_ for examples). + Parameters ---------- workspace : str | uuid.UUID, default=None @@ -93,7 +102,9 @@ def list_warehouses(workspace: Optional[str | UUID] = None) -> pd.DataFrame: (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) responses = _base_api( - request=f"/v1/workspaces/{workspace_id}/warehouses", uses_pagination=True + request=f"/v1/workspaces/{workspace_id}/warehouses", + uses_pagination=True, + client="fabric_sp", ) for r in responses: @@ -115,7 +126,7 @@ def list_warehouses(workspace: Optional[str | UUID] = None) -> pd.DataFrame: return df -def delete_warehouse(name: str, workspace: Optional[str | UUID] = None): +def delete_warehouse(name: str | UUID, workspace: Optional[str | UUID] = None): """ Deletes a Fabric warehouse. @@ -123,27 +134,15 @@ def delete_warehouse(name: str, workspace: Optional[str | UUID] = None): Parameters ---------- - name: str - Name of the warehouse. + name: str | uuid.UUID + Name or ID of the warehouse. workspace : str | uuid.UUID, default=None The Fabric workspace name or ID. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - - item_id = fabric.resolve_item_id( - item_name=name, type="Warehouse", workspace=workspace_id - ) - - _base_api( - request=f"/v1/workspaces/{workspace_id}/warehouses/{item_id}", method="delete" - ) - - print( - f"{icons.green_dot} The '{name}' warehouse within the '{workspace_name}' workspace has been deleted." - ) + delete_item(item=name, type="Warehouse", workspace=workspace) def get_warehouse_tables( diff --git a/src/sempy_labs/_workloads.py b/src/sempy_labs/_workloads.py index 2f593c4f..b1b6247a 100644 --- a/src/sempy_labs/_workloads.py +++ b/src/sempy_labs/_workloads.py @@ -6,9 +6,10 @@ _base_api, _create_dataframe, ) +from uuid import UUID -def list_workloads(capacity_name: str) -> pd.DataFrame: +def list_workloads(capacity: str | UUID, **kwargs) -> pd.DataFrame: """ Returns the current state of the specified capacity workloads. If a workload is enabled, the percentage of maximum memory that the workload can consume is also returned. @@ -17,8 +18,8 @@ def list_workloads(capacity_name: str) -> pd.DataFrame: Parameters ---------- - capacity_name : str - The capacity name. + capacity : str | uuid.UUID + The capacity name or ID. Returns ------- @@ -28,6 +29,12 @@ def list_workloads(capacity_name: str) -> pd.DataFrame: from sempy_labs._helper_functions import resolve_capacity_id + if "capacity_name" in kwargs: + capacity = kwargs["capacity_name"] + print( + f"{icons.warning} The 'capacity_name' parameter is deprecated. Please use 'capacity' instead." + ) + columns = { "Workload Name": "string", "State": "string", @@ -35,7 +42,7 @@ def list_workloads(capacity_name: str) -> pd.DataFrame: } df = _create_dataframe(columns=columns) - capacity_id = resolve_capacity_id(capacity_name=capacity_name) + capacity_id = resolve_capacity_id(capacity=capacity) response = _base_api(request=f"/v1.0/myorg/capacities/{capacity_id}/Workloads") @@ -53,10 +60,11 @@ def list_workloads(capacity_name: str) -> pd.DataFrame: def patch_workload( - capacity_name: str, + capacity: str | UUID, workload_name: str, state: Optional[str] = None, max_memory_percentage: Optional[int] = None, + **kwargs, ): """ Changes the state of a specific workload to Enabled or Disabled. @@ -66,8 +74,8 @@ def patch_workload( Parameters ---------- - capacity_name : str - The capacity name. + capacity : str | uuid.UUID + The capacity name or ID. workload_name : str The workload name. state : str, default=None @@ -78,7 +86,13 @@ def patch_workload( from sempy_labs._helper_functions import resolve_capacity_id - capacity_id = resolve_capacity_id(capacity_name=capacity_name) + if "capacity_name" in kwargs: + capacity = kwargs["capacity_name"] + print( + f"{icons.warning} The 'capacity_name' parameter is deprecated. Please use 'capacity' instead." + ) + + capacity_id = resolve_capacity_id(capacity=capacity) states = ["Disabled", "Enabled", "Unsupported"] state = state.capitalize() @@ -119,5 +133,5 @@ def patch_workload( _base_api(request=url, method="patch", payload=payload) print( - f"The '{workload_name}' workload within the '{capacity_name}' capacity has been updated accordingly." + f"The '{workload_name}' workload within the '{capacity}' capacity has been updated accordingly." ) diff --git a/src/sempy_labs/_workspace_identity.py b/src/sempy_labs/_workspace_identity.py index 56418b96..63ca8ad0 100644 --- a/src/sempy_labs/_workspace_identity.py +++ b/src/sempy_labs/_workspace_identity.py @@ -13,6 +13,8 @@ def provision_workspace_identity(workspace: Optional[str | UUID] = None): This is a wrapper function for the following API: `Workspaces - Provision Identity `_. + Service Principal Authentication is supported (see `here `_ for examples). + Parameters ---------- workspace : str | uuid.UUID, default=None @@ -28,6 +30,7 @@ def provision_workspace_identity(workspace: Optional[str | UUID] = None): method="post", lro_return_status_code=True, status_codes=None, + client="fabric_sp", ) print( @@ -41,6 +44,8 @@ def deprovision_workspace_identity(workspace: Optional[str | UUID] = None): This is a wrapper function for the following API: `Workspaces - Derovision Identity `_. + Service Principal Authentication is supported (see `here `_ for examples). + Parameters ---------- workspace : str | uuid.UUID, default=None @@ -56,6 +61,7 @@ def deprovision_workspace_identity(workspace: Optional[str | UUID] = None): method="post", lro_return_status_code=True, status_codes=None, + client="fabric_sp", ) print( diff --git a/src/sempy_labs/_workspaces.py b/src/sempy_labs/_workspaces.py index 184f21e3..1b05ef50 100644 --- a/src/sempy_labs/_workspaces.py +++ b/src/sempy_labs/_workspaces.py @@ -18,6 +18,8 @@ def delete_user_from_workspace( This is a wrapper function for the following API: `Groups - Delete User In Group `_. + Service Principal Authentication is supported (see `here `_ for examples). + Parameters ---------- email_address : str @@ -33,6 +35,7 @@ def delete_user_from_workspace( _base_api( request=f"/v1.0/myorg/groups/{workspace_id}/users/{email_address}", method="delete", + client="fabric_sp", ) print( f"{icons.green_dot} The '{email_address}' user has been removed from accessing the '{workspace_name}' workspace." @@ -50,6 +53,8 @@ def update_workspace_user( This is a wrapper function for the following API: `Groups - Update Group User `_. + Service Principal Authentication is supported (see `here `_ for examples). + Parameters ---------- email_address : str @@ -90,6 +95,7 @@ def update_workspace_user( request=f"/v1.0/myorg/groups/{workspace_id}/users", method="put", payload=payload, + client="fabric_sp", ) print( f"{icons.green_dot} The '{email_address}' user has been updated to a '{role_name}' within the '{workspace_name}' workspace." @@ -102,6 +108,8 @@ def list_workspace_users(workspace: Optional[str | UUID] = None) -> pd.DataFrame This is a wrapper function for the following API: `Workspaces - List Workspace Role Assignments `_. + Service Principal Authentication is supported (see `here `_ for examples). + Parameters ---------- workspace : str | uuid.UUID, default=None @@ -127,7 +135,9 @@ def list_workspace_users(workspace: Optional[str | UUID] = None) -> pd.DataFrame df = _create_dataframe(columns=columns) responses = _base_api( - request=f"v1/workspaces/{workspace_id}/roleAssignments", uses_pagination=True + request=f"v1/workspaces/{workspace_id}/roleAssignments", + uses_pagination=True, + client="fabric_sp", ) for r in responses: @@ -204,7 +214,9 @@ def add_user_to_workspace( def assign_workspace_to_capacity( - capacity_name: str, workspace: Optional[str | UUID] = None + capacity: str | UUID, + workspace: Optional[str | UUID] = None, + **kwargs, ): """ Assigns a workspace to a capacity. @@ -213,16 +225,22 @@ def assign_workspace_to_capacity( Parameters ---------- - capacity_name : str - The name of the capacity. + capacity : str | uuid.UUID + The name or ID of the capacity. workspace : str | uuid.UUID, default=None The name or ID of the Fabric workspace. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. """ + if "capacity_name" in kwargs: + capacity = kwargs["capacity_name"] + print( + f"{icons.warning} The 'capacity_name' parameter is deprecated. Please use 'capacity' instead." + ) + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - capacity_id = resolve_capacity_id(capacity_name=capacity_name) + capacity_id = resolve_capacity_id(capacity=capacity) payload = {"capacityId": capacity_id} @@ -233,7 +251,7 @@ def assign_workspace_to_capacity( status_codes=[200, 202], ) print( - f"{icons.green_dot} The '{workspace_name}' workspace has been assigned to the '{capacity_name}' capacity." + f"{icons.green_dot} The '{workspace_name}' workspace has been assigned to the '{capacity}' capacity." ) @@ -243,6 +261,8 @@ def unassign_workspace_from_capacity(workspace: Optional[str | UUID] = None): This is a wrapper function for the following API: `Workspaces - Unassign From Capacity `_. + Service Principal Authentication is supported (see `here `_ for examples). + Parameters ---------- workspace : str | uuid.UUID, default=None @@ -257,6 +277,7 @@ def unassign_workspace_from_capacity(workspace: Optional[str | UUID] = None): request=f"/v1/workspaces/{workspace_id}/unassignFromCapacity", method="post", status_codes=[200, 202], + client="fabric_sp", ) print( f"{icons.green_dot} The '{workspace_name}' workspace has been unassigned from its capacity." @@ -271,6 +292,8 @@ def list_workspace_role_assignments( This is a wrapper function for the following API: `Workspaces - List Workspace Role Assignments `_. + Service Principal Authentication is supported (see `here `_ for examples). + Parameters ---------- workspace : str | uuid.UUID, default=None @@ -295,7 +318,9 @@ def list_workspace_role_assignments( df = _create_dataframe(columns=columns) responses = _base_api( - request=f"v1/workspaces/{workspace_id}/roleAssignments", uses_pagination=True + request=f"v1/workspaces/{workspace_id}/roleAssignments", + uses_pagination=True, + client="fabric_sp", ) for r in responses: @@ -310,3 +335,26 @@ def list_workspace_role_assignments( df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) return df + + +def delete_workspace(workspace: Optional[str | UUID] = None): + """ + Deletes a workspace. + + This is a wrapper function for the following API: `Workspaces - Delete Workspace `_. + + Parameters + ---------- + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + + _base_api( + request=f"v1/workspaces/{workspace_id}", method="delete", client="fabric_sp" + ) + + print(f"{icons.green_dot} The '{workspace_name}' workspace has been deleted.") diff --git a/src/sempy_labs/admin/__init__.py b/src/sempy_labs/admin/__init__.py index a080bae8..9badd568 100644 --- a/src/sempy_labs/admin/__init__.py +++ b/src/sempy_labs/admin/__init__.py @@ -1,3 +1,18 @@ +from sempy_labs.admin._users import ( + list_access_entities, + list_user_subscriptions, +) +from sempy_labs.admin._workspaces import ( + add_user_to_workspace, + delete_user_from_workspace, + restore_deleted_workspace, +) +from sempy_labs.admin._artifacts import ( + list_unused_artifacts, +) +from sempy_labs.admin._shared import ( + list_widely_shared_artifacts, +) from sempy_labs.admin._datasets import ( list_datasets, list_dataset_users, @@ -9,6 +24,7 @@ from sempy_labs.admin._reports import ( list_reports, list_report_users, + list_report_subscriptions, ) from sempy_labs.admin._activities import ( list_activity_events, @@ -21,6 +37,18 @@ list_capacities, get_capacity_assignment_status, get_capacity_state, + list_capacity_users, + get_refreshables, +) +from sempy_labs.admin._tenant import ( + list_tenant_settings, + delete_capacity_tenant_setting_override, + update_tenant_setting, + update_capacity_tenant_setting_override, + list_workspaces_tenant_settings_overrides, + list_capacity_tenant_settings_overrides, + list_capacities_delegated_tenant_settings, + list_domain_tenant_settings_overrides, ) from sempy_labs.admin._basic_functions import ( assign_workspaces_to_capacity, @@ -28,9 +56,6 @@ list_workspaces, list_workspace_access_details, list_modified_workspaces, - list_tenant_settings, - list_capacities_delegated_tenant_settings, - list_access_entities, list_workspace_users, ) from sempy_labs.admin._domains import ( @@ -56,6 +81,14 @@ from sempy_labs.admin._git import ( list_git_connections, ) +from sempy_labs.admin._dataflows import ( + export_dataflow, +) +from sempy_labs.admin._tags import ( + list_tags, + create_tags, + delete_tag, +) __all__ = [ "list_items", @@ -94,4 +127,24 @@ "list_report_users", "patch_capacity", "list_workspace_users", + "list_widely_shared_artifacts", + "delete_capacity_tenant_setting_override", + "update_tenant_setting", + "update_capacity_tenant_setting_override", + "list_workspaces_tenant_settings_overrides", + "list_capacity_tenant_settings_overrides", + "list_capacities_delegated_tenant_settings", + "list_domain_tenant_settings_overrides", + "list_unused_artifacts", + "add_user_to_workspace", + "delete_user_from_workspace", + "restore_deleted_workspace", + "list_capacity_users", + "list_user_subscriptions", + "list_report_subscriptions", + "get_refreshables", + "export_dataflow", + "list_tags", + "create_tags", + "delete_tag", ] diff --git a/src/sempy_labs/admin/_apps.py b/src/sempy_labs/admin/_apps.py index 13905037..0f8ac7f1 100644 --- a/src/sempy_labs/admin/_apps.py +++ b/src/sempy_labs/admin/_apps.py @@ -40,7 +40,7 @@ def list_apps( "App Id": "string", "Description": "string", "Published By": "string", - "Last Update": "datetime", + "Last Update": "datetime_coerce", } df = _create_dataframe(columns=columns) diff --git a/src/sempy_labs/admin/_artifacts.py b/src/sempy_labs/admin/_artifacts.py new file mode 100644 index 00000000..ac033f1c --- /dev/null +++ b/src/sempy_labs/admin/_artifacts.py @@ -0,0 +1,62 @@ +import pandas as pd +from sempy_labs._helper_functions import ( + _base_api, +) +from uuid import UUID +from typing import Optional +from sempy_labs.admin._basic_functions import ( + _resolve_workspace_name_and_id, + _create_dataframe, + _update_dataframe_datatypes, +) + + +def list_unused_artifacts(workspace: Optional[str | UUID] = None) -> pd.DataFrame: + """ + Returns a list of datasets, reports, and dashboards that have not been used within 30 days for the specified workspace. + + This is a wrapper function for the following API: `Admin - Groups GetUnusedArtifactsAsAdmin `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of datasets, reports, and dashboards that have not been used within 30 days for the specified workspace. + """ + + (workspace_name, workspace_id) = _resolve_workspace_name_and_id(workspace) + + columns = { + "Artifact Name": "string", + "Artifact Id": "string", + "Artifact Type": "string", + "Artifact Size in MB": "string", + "Created Date Time": "datetime", + "Last Accessed Date Time": "datetime", + } + + df = _create_dataframe(columns=columns) + + responses = _base_api( + request=f"/v1.0/myorg/admin/groups/{workspace_id}/unused", + client="fabric_sp", + uses_pagination=True, + ) + + for r in responses: + for i in r.get("unusedArtifactEntities", []): + new_data = { + "Artifact Name": i.get("displayName"), + "Artifact Id": i.get("artifactId"), + "Artifact Type": i.get("artifactType"), + "Artifact Size in MB": i.get("artifactSizeInMB"), + "Created Date Time": i.get("createdDateTime"), + "Last Accessed Date Time": i.get("lastAccessedDateTime"), + } + + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + _update_dataframe_datatypes(dataframe=df, column_map=columns) + + return df diff --git a/src/sempy_labs/admin/_basic_functions.py b/src/sempy_labs/admin/_basic_functions.py index 5e96d6fc..485cd500 100644 --- a/src/sempy_labs/admin/_basic_functions.py +++ b/src/sempy_labs/admin/_basic_functions.py @@ -249,133 +249,6 @@ def unassign_workspaces_from_capacity( ) -@log -def list_tenant_settings() -> pd.DataFrame: - """ - Lists all tenant settings. - - This is a wrapper function for the following API: `Tenants - List Tenant Settings `_. - - Service Principal Authentication is supported (see `here `_ for examples). - - Returns - ------- - pandas.DataFrame - A pandas dataframe showing the tenant settings. - """ - - columns = { - "Setting Name": "string", - "Title": "string", - "Enabled": "bool", - "Can Specify Security Groups": "bool", - "Tenant Setting Group": "string", - "Enabled Security Groups": "string", - } - df = _create_dataframe(columns=columns) - - response = _base_api(request="/v1/admin/tenantsettings", client="fabric_sp") - - for i in response.json().get("value", []): - new_data = { - "Setting Name": i.get("settingName"), - "Title": i.get("title"), - "Enabled": i.get("enabled"), - "Can Specify Security Groups": i.get("canSpecifySecurityGroups"), - "Tenant Setting Group": i.get("tenantSettingGroup"), - "Enabled Security Groups": [i.get("enabledSecurityGroups", [])], - } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - _update_dataframe_datatypes(dataframe=df, column_map=columns) - - return df - - -def list_capacities_delegated_tenant_settings( - return_dataframe: bool = True, -) -> pd.DataFrame | dict: - """ - Returns list of tenant setting overrides that override at the capacities. - - This is a wrapper function for the following API: `Tenants - List Capacities Tenant Settings Overrides `_. - - Service Principal Authentication is supported (see `here `_ for examples). - - Parameters - ---------- - return_dataframe : bool, default=True - If True, returns a dataframe. If False, returns a dictionary. - - Returns - ------- - pandas.DataFrame | dict - A pandas dataframe showing a list of tenant setting overrides that override at the capacities. - """ - - columns = { - "Capacity Id": "string", - "Setting Name": "string", - "Setting Title": "string", - "Setting Enabled": "bool", - "Can Specify Security Groups": "bool", - "Enabled Security Groups": "string", - "Tenant Setting Group": "string", - "Tenant Setting Properties": "string", - "Delegate to Workspace": "bool", - "Delegated From": "string", - } - df = _create_dataframe(columns=columns) - - responses = _base_api( - request="/v1/admin/capacities/delegatedTenantSettingOverrides", - client="fabric_sp", - uses_pagination=True, - ) - - if return_dataframe: - for r in responses: - for i in r.get("Overrides", []): - tenant_settings = i.get("tenantSettings", []) - for setting in tenant_settings: - new_data = { - "Capacity Id": i.get("id"), - "Setting Name": setting.get("settingName"), - "Setting Title": setting.get("title"), - "Setting Enabled": setting.get("enabled"), - "Can Specify Security Groups": setting.get( - "canSpecifySecurityGroups" - ), - "Enabled Security Groups": [ - setting.get("enabledSecurityGroups", []) - ], - "Tenant Setting Group": setting.get("tenantSettingGroup"), - "Tenant Setting Properties": [setting.get("properties", [])], - "Delegate to Workspace": setting.get("delegateToWorkspace"), - "Delegated From": setting.get("delegatedFrom"), - } - - df = pd.concat( - [df, pd.DataFrame(new_data, index=[0])], ignore_index=True - ) - - _update_dataframe_datatypes(dataframe=df, column_map=columns) - - return df - else: - combined_response = { - "overrides": [], - "continuationUri": "", - "continuationToken": "", - } - for r in responses: - combined_response["overrides"].extend(r["Overrides"]) - combined_response["continuationUri"] = r["continuationUri"] - combined_response["continuationToken"] = r["continuationToken"] - - return combined_response - - def list_modified_workspaces( modified_since: Optional[str] = None, exclude_inactive_workspaces: Optional[bool] = False, @@ -425,58 +298,6 @@ def list_modified_workspaces( return df -def list_access_entities( - user_email_address: str, -) -> pd.DataFrame: - """ - Shows a list of permission details for Fabric and Power BI items the specified user can access. - - This is a wrapper function for the following API: `Users - List Access Entities `_. - - Service Principal Authentication is supported (see `here `_ for examples). - - Parameters - ---------- - user_email_address : str - The user's email address. - - Returns - ------- - pandas.DataFrame - A pandas dataframe showing a list of permission details for Fabric and Power BI items the specified user can access. - """ - - columns = { - "Item Id": "string", - "Item Name": "string", - "Item Type": "string", - "Permissions": "string", - "Additional Permissions": "string", - } - df = _create_dataframe(columns=columns) - - responses = _base_api( - request=f"/v1/admin/users/{user_email_address}/access", - client="fabric_sp", - uses_pagination=True, - ) - - for r in responses: - for v in r.get("accessEntities", []): - new_data = { - "Item Id": v.get("id"), - "Item Name": v.get("displayName"), - "Item Type": v.get("itemAccessDetails", {}).get("type"), - "Permissions": v.get("itemAccessDetails", {}).get("permissions"), - "Additional Permissions": v.get("itemAccessDetails", {}).get( - "additionalPermissions" - ), - } - df = pd.concat([df, pd.DataFrame([new_data])], ignore_index=True) - - return df - - def list_workspace_access_details( workspace: Optional[Union[str, UUID]] = None, ) -> pd.DataFrame: @@ -529,13 +350,40 @@ def list_workspace_access_details( return df +def _resolve_workspace_name(workspace_id: Optional[UUID] = None) -> str: + from sempy_labs._helper_functions import _get_fabric_context_setting + from sempy.fabric.exceptions import FabricHTTPException + + if workspace_id is None: + workspace_id = _get_fabric_context_setting(name="trident.workspace.id") + + try: + workspace_name = ( + _base_api( + request=f"/v1/admin/workspaces/{workspace_id}", client="fabric_sp" + ) + .json() + .get("name") + ) + except FabricHTTPException: + raise ValueError( + f"{icons.red_dot} The '{workspace_id}' workspace was not found." + ) + return workspace_name + + def _resolve_workspace_name_and_id( workspace: str | UUID, ) -> Tuple[str, UUID]: + from sempy_labs._helper_functions import _get_fabric_context_setting + if workspace is None: - workspace_id = fabric.get_workspace_id() - workspace_name = fabric.resolve_workspace_name(workspace_id) + workspace_id = _get_fabric_context_setting(name="trident.workspace.id") + workspace_name = _resolve_workspace_name(workspace_id) + elif _is_valid_uuid(workspace): + workspace_id = workspace + workspace_name = _resolve_workspace_name(workspace_id) else: dfW = list_workspaces(workspace=workspace) if not dfW.empty: diff --git a/src/sempy_labs/admin/_capacities.py b/src/sempy_labs/admin/_capacities.py index 78980db5..d0cbb49f 100644 --- a/src/sempy_labs/admin/_capacities.py +++ b/src/sempy_labs/admin/_capacities.py @@ -5,6 +5,7 @@ from sempy._utils._log import log from sempy_labs._helper_functions import ( _base_api, + _build_url, _create_dataframe, _update_dataframe_datatypes, _is_valid_uuid, @@ -57,6 +58,24 @@ def _resolve_capacity_name_and_id( return capacity_name, capacity_id +def _resolve_capacity_id( + capacity: str | UUID, +) -> UUID: + + if _is_valid_uuid(capacity): + capacity_id = capacity + else: + dfC = list_capacities(capacity=capacity) + if dfC.empty: + raise ValueError( + f"{icons.red_dot} The '{capacity}' capacity was not found." + ) + + capacity_id = dfC["Capacity Id"].iloc[0] + + return capacity_id + + def _list_capacities_meta() -> pd.DataFrame: """ Shows the a list of capacities and their properties. This function is the admin version. @@ -221,7 +240,7 @@ def list_capacities( "Sku": "string", "Region": "string", "State": "string", - "Admins": "string", + "Admins": "list", } df = _create_dataframe(columns=columns) @@ -248,3 +267,205 @@ def list_capacities( df = df[df["Capacity Name"] == capacity] return df + + +def list_capacity_users(capacity: str | UUID) -> pd.DataFrame: + """ + Shows a list of users that have access to the specified workspace. + + This is a wrapper function for the following API: `Admin - Capacities GetCapacityUsersAsAdmin `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Parameters + ---------- + capacity : str | uuid.UUID + The name or ID of the capacity. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of users that have access to the specified workspace. + """ + + (capacity_name, capacity_id) = _resolve_capacity_name_and_id(capacity) + + columns = { + "User Name": "string", + "Email Address": "string", + "Capacity User Access Right": "string", + "Identifier": "string", + "Graph Id": "string", + "Principal Type": "string", + "User Type": "string", + "Profile": "string", + } + + df = _create_dataframe(columns=columns) + + response = _base_api( + request=f"/v1.0/myorg/admin/capacities/{capacity_id}/users", client="fabric_sp" + ) + + rows = [] + for v in response.json().get("value", []): + rows.append( + { + "User Name": v.get("displayName"), + "Email Address": v.get("emailAddress"), + "Capacity User Access Right": v.get("capacityUserAccessRight"), + "Identifier": v.get("identifier"), + "Graph Id": v.get("graphId"), + "Principal Type": v.get("principalType"), + "User Type": v.get("userType"), + "Profile": v.get("profile"), + } + ) + + if rows: + df = pd.DataFrame(rows, columns=list(columns.keys())) + + _update_dataframe_datatypes(dataframe=df, column_map=columns) + + return df + + +@log +def get_refreshables( + top: Optional[int] = None, + expand: Optional[str] = None, + filter: Optional[str] = None, + skip: Optional[int] = None, + capacity: Optional[str | UUID] = None, +) -> pd.DataFrame | dict: + """ + Returns a list of refreshables for the organization within a capacity. + + Power BI retains a seven-day refresh history for each dataset, up to a maximum of sixty refreshes. + + This is a wrapper function for the following API: `Admin - Get Refreshables `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Parameters + ---------- + top : int, default=None + Returns only the first n results. + expand : str, default=None + Accepts a comma-separated list of data types, which will be expanded inline in the response. Supports capacities and groups. + filter : str, default=None + Returns a subset of a results based on Odata filter query parameter condition. + skip : int, default=None + Skips the first n results. Use with top to fetch results beyond the first 1000. + capacity : str | uuid.UUID, default=None + The capacity name or ID to filter. If None, all capacities are returned. + + Returns + ------- + pandas.DataFrame + Returns a list of refreshables for the organization within a capacity. + """ + + columns = { + "Workspace Id": "string", + "Workspace Name": "string", + "Item Id": "string", + "Item Name": "string", + "Item Kind": "string", + "Capacity Id": "string", + "Capacity Name": "string", + "Capacity SKU": "string", + "Refresh Count": "int", + "Refresh Failures": "int", + "Average Duration": "float", + "Median Duration": "float", + "Refreshes Per Day": "int", + "Refresh Type": "string", + "Start Time": "string", + "End Time": "string", + "Status": "string", + "Request Id": "string", + "Service Exception Json": "string", + "Extended Status": "dict", + "Refresh Attempts": "list", + "Refresh Schedule Days": "list", + "Refresh Schedule Times": "list", + "Refresh Schedule Enabled": "bool", + "Refresh Schedule Local Timezone Id": "string", + "Refresh Schedule Notify Option": "string", + "Configured By": "list", + } + + df = _create_dataframe(columns=columns) + + params = {} + url = ( + "/v1.0/myorg/admin/capacities/refreshables" + if capacity is None + else f"/v1.0/myorg/admin/capacities/{_resolve_capacity_id(capacity=capacity)}/refreshables" + ) + + if top is not None: + params["$top"] = top + + if expand is not None: + params["$expand"] = expand + + if filter is not None: + params["$filter"] = filter + + if skip is not None: + params["$skip"] = skip + + url = _build_url(url, params) + + responses = _base_api(request=url, client="fabric_sp") + + refreshables = [] + + for i in responses.json().get("value", []): + last_refresh = i.get("lastRefresh", {}) + refresh_schedule = i.get("refreshSchedule", {}) + new_data = { + "Workspace Id": i.get("group", {}).get("id"), + "Workspace Name": i.get("group", {}).get("name"), + "Item Id": i.get("id"), + "Item Name": i.get("name"), + "Item Kind": i.get("kind"), + "Capacity Id": ( + i.get("capacity", {}).get("id").lower() + if i.get("capacity", {}).get("id") + else None + ), + "Capacity Name": i.get("capacity", {}).get("displayName"), + "Capacity SKU": i.get("capacity", {}).get("sku"), + "Refresh Count": i.get("refreshCount", 0), + "Refresh Failures": i.get("refreshFailures", 0), + "Average Duration": i.get("averageDuration", 0), + "Median Duration": i.get("medianDuration", 0), + "Refreshes Per Day": i.get("refreshesPerDay", 0), + "Refresh Type": last_refresh.get("refreshType"), + "Start Time": last_refresh.get("startTime"), + "End Time": last_refresh.get("endTime"), + "Status": last_refresh.get("status"), + "Request Id": last_refresh.get("requestId"), + "Service Exception Json": last_refresh.get("serviceExceptionJson"), + "Extended Status": last_refresh.get("extendedStatus"), + "Refresh Attempts": last_refresh.get("refreshAttempts"), + "Refresh Schedule Days": refresh_schedule.get("days"), + "Refresh Schedule Times": refresh_schedule.get("times"), + "Refresh Schedule Enabled": refresh_schedule.get("enabled"), + "Refresh Schedule Local Timezone Id": refresh_schedule.get( + "localTimeZoneId" + ), + "Refresh Schedule Notify Option": refresh_schedule.get("notifyOption"), + "Configured By": i.get("configuredBy"), + } + + refreshables.append(new_data) + + if len(refreshables) > 0: + df = pd.DataFrame(refreshables) + _update_dataframe_datatypes(dataframe=df, column_map=columns) + + return df diff --git a/src/sempy_labs/admin/_dataflows.py b/src/sempy_labs/admin/_dataflows.py new file mode 100644 index 00000000..8d9e70d8 --- /dev/null +++ b/src/sempy_labs/admin/_dataflows.py @@ -0,0 +1,45 @@ +from typing import Optional +from sempy_labs._helper_functions import ( + _base_api, +) +from sempy_labs.admin._items import ( + _resolve_item_id, +) +from uuid import UUID +from sempy._utils._log import log + + +@log +def export_dataflow( + dataflow: str | UUID, + workspace: Optional[str | UUID] = None, +) -> dict: + """ + Shows a list of datasets for the organization. + + This is a wrapper function for the following API: `Admin - Dataflows ExportDataflowAsAdmin `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Parameters + ---------- + dataflow : str | UUID, default=None + The dataflow Name or Id. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or id. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + Only used if given a dataflow name and not an id. + + Returns + ------- + dict + Exported Json file. + """ + dataflow_id = _resolve_item_id(item=dataflow, type="dataflow", workspace=workspace) + + url = f"/v1.0/myorg/admin/dataflows/{dataflow_id}/export" + + response = _base_api(request=url, client="fabric_sp") + + return response.json() diff --git a/src/sempy_labs/admin/_items.py b/src/sempy_labs/admin/_items.py index 8fafa4da..2ad2f8eb 100644 --- a/src/sempy_labs/admin/_items.py +++ b/src/sempy_labs/admin/_items.py @@ -17,20 +17,26 @@ def _resolve_item_id( - item_name: str, + item: str, type: Optional[str] = None, workspace: Optional[str | UUID] = None, ) -> UUID: + if _is_valid_uuid(item): + item_id = item - dfI = list_items(workspace=workspace, type=type) - dfI_filt = dfI[dfI["Item Name"] == item_name] + else: + workspace_id = _resolve_workspace_name_and_id(workspace)[1] + dfI = list_items(workspace=workspace_id, type=type) + dfI_filt = dfI[dfI["Item Name"] == item] - if len(dfI_filt) == 0: - raise ValueError( - f"The '{item_name}' {type} does not exist within the '{workspace}' workspace or is not of type '{type}'." - ) + if len(dfI_filt) == 0: + raise ValueError( + f"The '{item}' {type} does not exist within the '{workspace}' workspace or is not of type '{type}'." + ) + + item_id = dfI_filt["Item Id"].iloc[0] - return dfI_filt["Item Id"].iloc[0] + return item_id def _resolve_item_name_and_id( @@ -84,9 +90,8 @@ def list_items( capacity : str | uuid.UUID, default=None The capacity name or id. workspace : str | uuid.UUID, default=None - The Fabric workspace name. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. + The Fabric workspace name or id. + Defaults to None which looks into all the workspaces. state : str, default=None The item state. type : str, default=None diff --git a/src/sempy_labs/admin/_reports.py b/src/sempy_labs/admin/_reports.py index 5170695d..07d5782a 100644 --- a/src/sempy_labs/admin/_reports.py +++ b/src/sempy_labs/admin/_reports.py @@ -113,7 +113,7 @@ def list_report_users(report: str | UUID) -> pd.DataFrame: """ Shows a list of users that have access to the specified report. - This is a wrapper function for the following API: `Admin - Reports GetDatasetUsersAsAdmin `_. + This is a wrapper function for the following API: `Admin - Reports GetDatasetUsersAsAdmin `_. Service Principal Authentication is supported (see `here `_ for examples). @@ -163,3 +163,77 @@ def list_report_users(report: str | UUID) -> pd.DataFrame: _update_dataframe_datatypes(dataframe=df, column_map=columns) return df + + +def list_report_subscriptions(report: str | UUID) -> pd.DataFrame: + """ + Shows a list of report subscriptions along with subscriber details. This is a preview API call. + + This is a wrapper function for the following API: `Admin - Reports GetReportSubscriptionsAsAdmin `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Parameters + ---------- + report : str | uuid.UUID + The name or ID of the report. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of report subscriptions along with subscriber details. This is a preview API call. + """ + + report_id = _resolve_report_id(report) + + columns = { + "Subscription Id": "string", + "Title": "string", + "Artifact Id": "string", + "Artifact Name": "string", + "Sub Artifact Name": "string", + "Artifact Type": "string", + "Is Enabled": "bool", + "Frequency": "string", + "Start Date": "datetime", + "End Date": "string", + "Link To Content": "bool", + "Preview Image": "bool", + "Attachment Format": "string", + "Users": "string", + } + + df = _create_dataframe(columns=columns) + + response = _base_api( + request=f"/v1.0/myorg/admin/reports/{report_id}/subscriptions", + client="fabric_sp", + ) + + rows = [] + for v in response.json().get("value", []): + rows.append( + { + "Subscription Id": v.get("id"), + "Title": v.get("title"), + "Artifact Id": v.get("artifactId"), + "Artifact Name": v.get("artifactDisplayName"), + "Sub Artifact Name": v.get("subArtifactDisplayName"), + "Artifact Type": v.get("artifactType"), + "Is Enabled": v.get("isEnabled"), + "Frequency": v.get("frequency"), + "Start Date": v.get("startDate"), + "End Date": v.get("endDate"), + "Link To Content": v.get("linkToContent"), + "Preview Image": v.get("previewImage"), + "Attachment Format": v.get("attachmentFormat"), + "Users": str(v.get("users")), + } + ) + + if rows: + df = pd.DataFrame(rows, columns=list(columns.keys())) + + _update_dataframe_datatypes(dataframe=df, column_map=columns) + + return df diff --git a/src/sempy_labs/admin/_scanner.py b/src/sempy_labs/admin/_scanner.py index 3bc56990..7ab6fb25 100644 --- a/src/sempy_labs/admin/_scanner.py +++ b/src/sempy_labs/admin/_scanner.py @@ -1,4 +1,3 @@ -import sempy.fabric as fabric from typing import Optional, List from uuid import UUID from sempy.fabric.exceptions import FabricHTTPException @@ -10,6 +9,7 @@ _base_api, _is_valid_uuid, _build_url, + resolve_workspace_name, ) @@ -54,7 +54,7 @@ def scan_workspaces( """ if workspace is None: - workspace = fabric.resolve_workspace_name() + workspace = resolve_workspace_name() if isinstance(workspace, str): workspace = [workspace] @@ -115,6 +115,4 @@ def scan_workspaces( client="fabric_sp", ) - print(f"{icons.green_dot} Status: {scan_status}") - return response.json() diff --git a/src/sempy_labs/admin/_shared.py b/src/sempy_labs/admin/_shared.py new file mode 100644 index 00000000..2987957c --- /dev/null +++ b/src/sempy_labs/admin/_shared.py @@ -0,0 +1,76 @@ +import pandas as pd +from sempy_labs._helper_functions import ( + _base_api, + _create_dataframe, +) + + +def list_widely_shared_artifacts( + api_name: str = "LinksSharedToWholeOrganization", +) -> pd.DataFrame: + """ + Returns a list of Power BI reports that are shared with the whole organization through links or a list of Power BI items (such as reports or dashboards) that are published to the web. + + This is a wrapper function for the following APIs: + `Admin - WidelySharedArtifacts LinksSharedToWholeOrganization `_. + `Admin - WidelySharedArtifacts PublishedToWeb `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Parameters + ---------- + api_name : str, default = "LinksSharedToWholeOrganization" + The name of the API to call. Either "LinksSharedToWholeOrganization" or "PublishedToWeb". + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of Power BI reports that are shared with the whole organization through links or a list of Power BI items (such as reports or dashboards) that are published to the web. + """ + + columns = { + "Artifact Id": "string", + "Artifact Name": "string", + "Artifact Type": "string", + "Access Right": "string", + "Share Type": "string", + "Sharer Name": "string", + "Sharer Email Address": "string", + "Sharer Identifier": "string", + "Sharer Graph Id": "string", + "Sharer Principal Type": "string", + } + + df = _create_dataframe(columns=columns) + + api = ( + "linksSharedToWholeOrganization" + if api_name == "LinksSharedToWholeOrganization" + else "publishedToWeb" + ) + + responses = _base_api( + request=f"/v1.0/myorg/admin/widelySharedArtifacts/{api}", + client="fabric_sp", + uses_pagination=True, + ) + + for r in responses: + for v in r.get("ArtifactAccessEntities", []): + sharer = v.get("sharer", {}) + new_data = { + "Artifact Id": v.get("artifactId"), + "Artifact Name": v.get("displayName"), + "Artifact Type": v.get("artifactType"), + "Access Right": v.get("accessRight"), + "Share Type": v.get("shareType"), + "Sharer Name": sharer.get("displayName"), + "Sharer Email Address": sharer.get("emailAddress"), + "Sharer Identifier": sharer.get("identifier"), + "Sharer Graph Id": sharer.get("graphId"), + "Sharer Principal Type": sharer.get("principalType"), + } + + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + return df diff --git a/src/sempy_labs/admin/_tags.py b/src/sempy_labs/admin/_tags.py new file mode 100644 index 00000000..c64847dd --- /dev/null +++ b/src/sempy_labs/admin/_tags.py @@ -0,0 +1,126 @@ +from sempy_labs._helper_functions import ( + _base_api, + _is_valid_uuid, +) +from uuid import UUID +from sempy_labs._tags import list_tags +import sempy_labs._icons as icons +from typing import List + + +def resolve_tag_id(tag: str | UUID): + + if _is_valid_uuid(tag): + tag_id = tag + else: + df = list_tags() + df[df["Tag Name"] == tag] + if df.empty: + raise ValueError(f"{icons.red_dot} The '{tag}' tag does not exist.") + tag_id = df.iloc[0]["Tag Id"] + + return tag_id + + +def create_tags(tags: str | List[str]): + """ + Creates a new tag or tags. + + This is a wrapper function for the following API: `Tags - Bulk Create Tags `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Parameters + ---------- + tags : str | List[str] + The name of the tag or tags to create. + """ + + if isinstance(tags, str): + tags = [tags] + + # Check the length of the tags + for tag in tags: + if len(tag) > 40: + raise ValueError( + f"{icons.red_dot} The '{tag}' tag name is too long. It must be 40 characters or less." + ) + + # Check if the tags already exist + df = list_tags() + existing_names = df["Tag Name"].tolist() + existing_ids = df["Tag Id"].tolist() + + available_tags = [ + tag for tag in tags if tag not in existing_names and tag not in existing_ids + ] + unavailable_tags = [ + tag for tag in tags if tag in existing_names or tag in existing_ids + ] + + print(f"{icons.warning} The following tags already exist: {unavailable_tags}") + if not available_tags: + print(f"{icons.info} No new tags to create.") + return + + payload = [{"displayName": name} for name in available_tags] + + for tag in tags: + _base_api( + request="/v1/admin/bulkCreateTags", + client="fabric_sp", + method="post", + payload=payload, + status_codes=201, + ) + + print(f"{icons.green_dot} The '{available_tags}' tag(s) have been created.") + + +def delete_tag(tag: str | UUID): + """ + Deletes a tag. + + This is a wrapper function for the following API: `Tags - Delete Tag `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Parameters + ---------- + tag : str | uuid.UUID + The name or ID of the tag to delete. + """ + + tag_id = resolve_tag_id(tag) + + _base_api(request=f"/v1/admin/tags/{tag_id}", client="fabric_sp", method="delete") + + print(f"{icons.green_dot} The '{tag}' tag has been deleted.") + + +def update_tag(name: str, tag: str | UUID): + """ + Updates the name of a tag. + + This is a wrapper function for the following API: `Tags - Update Tag `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Parameters + ---------- + name : str + The new name of the tag. + tag : str | uuid.UUID + The name or ID of the tag to update. + """ + + tag_id = resolve_tag_id(tag) + + _base_api( + request=f"/v1/admin/tags/{tag_id}", + client="fabric_sp", + method="patch", + payload={"displayName": name}, + ) + + print(f"{icons.green_dot} The '{tag}' tag has been renamed to '{name}'.") diff --git a/src/sempy_labs/admin/_tenant.py b/src/sempy_labs/admin/_tenant.py new file mode 100644 index 00000000..e8415adf --- /dev/null +++ b/src/sempy_labs/admin/_tenant.py @@ -0,0 +1,494 @@ +from sempy_labs._helper_functions import ( + _update_dataframe_datatypes, + _base_api, + _create_dataframe, +) +from sempy._utils._log import log +import pandas as pd +from uuid import UUID +from sempy_labs.admin._capacities import _resolve_capacity_name_and_id +import sempy_labs._icons as icons +from typing import Optional, List + + +@log +def list_tenant_settings() -> pd.DataFrame: + """ + Lists all tenant settings. + + This is a wrapper function for the following API: `Tenants - List Tenant Settings `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing the tenant settings. + """ + + columns = { + "Setting Name": "string", + "Title": "string", + "Enabled": "bool", + "Can Specify Security Groups": "bool", + "Tenant Setting Group": "string", + "Enabled Security Groups": "list", + } + df = _create_dataframe(columns=columns) + + response = _base_api(request="/v1/admin/tenantsettings", client="fabric_sp") + + for i in response.json().get("value", []): + new_data = { + "Setting Name": i.get("settingName"), + "Title": i.get("title"), + "Enabled": i.get("enabled"), + "Can Specify Security Groups": i.get("canSpecifySecurityGroups"), + "Tenant Setting Group": i.get("tenantSettingGroup"), + "Enabled Security Groups": [i.get("enabledSecurityGroups", [])], + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + _update_dataframe_datatypes(dataframe=df, column_map=columns) + + return df + + +@log +def list_capacity_tenant_settings_overrides( + capacity: Optional[str | UUID] = None, + return_dataframe: bool = True, +) -> pd.DataFrame | dict: + """ + Returns list of tenant setting overrides that override at the capacities. + + This is a wrapper function for the following API: `Tenants - List Capacities Tenant Settings Overrides `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Parameters + ---------- + capacity : str | uuid.UUID, default=None + The capacity name or ID. + Defaults to None which resolves to showing all capacities. + return_dataframe : bool, default=True + If True, returns a dataframe. If False, returns a dictionary. + + Returns + ------- + pandas.DataFrame | dict + A pandas dataframe showing a list of tenant setting overrides that override at the capacities. + """ + + columns = { + "Capacity Id": "string", + "Setting Name": "string", + "Setting Title": "string", + "Setting Enabled": "bool", + "Can Specify Security Groups": "bool", + "Enabled Security Groups": "list", + "Tenant Setting Group": "string", + "Tenant Setting Properties": "list", + "Delegate to Workspace": "bool", + "Delegated From": "string", + } + + if capacity is None: + url = "/v1/admin/capacities/delegatedTenantSettingOverrides" + else: + (_, capacity_id) = _resolve_capacity_name_and_id(capacity=capacity) + url = f"/v1/admin/capacities/{capacity_id}/delegatedTenantSettingOverrides" + responses = _base_api( + request=url, + client="fabric_sp", + uses_pagination=True, + ) + + def create_new_data(setting, capacity_id=None): + return { + "Capacity Id": capacity_id or setting.get("id"), + "Setting Name": setting.get("settingName"), + "Setting Title": setting.get("title"), + "Setting Enabled": setting.get("enabled"), + "Can Specify Security Groups": setting.get("canSpecifySecurityGroups"), + "Enabled Security Groups": setting.get("enabledSecurityGroups", []), + "Tenant Setting Group": setting.get("tenantSettingGroup"), + "Tenant Setting Properties": setting.get("properties", []), + "Delegate to Workspace": setting.get("delegateToWorkspace"), + "Delegated From": setting.get("delegatedFrom"), + } + + def process_responses(responses, capacity_id=None, return_dataframe=False): + data = [] + df = _create_dataframe(columns=columns) + + for r in responses: + if capacity_id is None: + # If capacity_id is None, we access 'Overrides' -> 'tenantSettings' + for override in r.get("overrides", []): + capacity_id = override.get("id") + tenant_settings = override.get("tenantSettings", []) + for setting in tenant_settings: + data.append(create_new_data(setting, capacity_id)) + else: + # If capacity_id is provided, we access 'value' directly for tenantSettings + for setting in r.get("value", []): + data.append( + create_new_data(setting, capacity_id) + ) # Use provided capacity_id + + if return_dataframe: + if data: + df = pd.DataFrame(data) + _update_dataframe_datatypes(dataframe=df, column_map=columns) + return df + else: + key = "overrides" if capacity_id is None else "value" + continuation_uri = r.get("continuationUri", "") + continuation_token = r.get("continuationToken", "") + + return { + key: data, + "continuationUri": continuation_uri, + "continuationToken": continuation_token, + } + + # Main logic + if capacity is None: + return ( + process_responses(responses, return_dataframe=True) + if return_dataframe + else process_responses(responses) + ) + else: + return ( + process_responses(responses, capacity_id=capacity_id, return_dataframe=True) + if return_dataframe + else process_responses(responses, capacity_id=capacity_id) + ) + + +@log +def list_capacities_delegated_tenant_settings( + return_dataframe: bool = True, +) -> pd.DataFrame | dict: + """ + Returns list of tenant setting overrides that override at the capacities. + + NOTE: This function is to be deprecated. Please use the `list_capacity_tenant_settings_overrides` function instead. + + This is a wrapper function for the following API: `Tenants - List Capacities Tenant Settings Overrides `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Parameters + ---------- + return_dataframe : bool, default=True + If True, returns a dataframe. If False, returns a dictionary. + + Returns + ------- + pandas.DataFrame | dict + A pandas dataframe showing a list of tenant setting overrides that override at the capacities. + """ + + list_capacity_tenant_settings_overrides(return_dataframe=return_dataframe) + + +@log +def delete_capacity_tenant_setting_override(capacity: str | UUID, tenant_setting: str): + """ + Remove given tenant setting override for given capacity Id. + + This is a wrapper function for the following API: `Tenants - Delete Capacity Tenant Setting Override `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Parameters + ---------- + capacity : str | uuid.UUID + The capacity name or ID. + tenant_setting : str + The tenant setting name. Example: "TenantSettingForCapacityDelegatedSwitch" + """ + + (capacity_name, capacity_id) = _resolve_capacity_name_and_id(capacity=capacity) + + _base_api( + request=f"/v1/admin/capacities/{capacity_id}/delegatedTenantSettingOverrides/{tenant_setting}", + client="fabric_sp", + method="delete", + ) + + print( + f"{icons.green_dot} The '{tenant_setting}' tenant setting has been removed from the '{capacity_name}' capacity." + ) + + +@log +def update_tenant_setting( + tenant_setting: str, + enabled: bool, + delegate_to_capacity: Optional[bool] = None, + delegate_to_domain: Optional[bool] = None, + delegate_to_workspace: Optional[bool] = None, + enabled_security_groups: Optional[List[dict]] = None, + excluded_security_groups: Optional[List[dict]] = None, + properties: Optional[List[dict]] = None, +): + """ + Update a given tenant setting. + + This is a wrapper function for the following API: `Tenants - Update Tenant Setting `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Parameters + ---------- + tenant_setting : str + The tenant setting name. Example: "TenantSettingForCapacityDelegatedSwitch" + enabled : bool + The status of the tenant setting. False - Disabled, True - Enabled. + delegate_to_capacity : bool, default=None + Indicates whether the tenant setting can be delegated to a capacity admin. False - Capacity admin cannot override the tenant setting. True - Capacity admin can override the tenant setting. + delegate_to_domain : bool, default=None + Indicates whether the tenant setting can be delegated to a domain admin. False - Domain admin cannot override the tenant setting. True - Domain admin can override the tenant setting. + delegate_to_workspace : bool, default=None + Indicates whether the tenant setting can be delegated to a workspace admin. False - Workspace admin cannot override the tenant setting. True - Workspace admin can override the tenant setting. + enabled_security_groups : List[dict], default=None + A list of enabled security groups. Example: + [ + { + "graphId": "f51b705f-a409-4d40-9197-c5d5f349e2f0", + "name": "TestComputeCdsa" + } + ] + excluded_security_groups : List[dict], default=None + A list of excluded security groups. Example: + [ + { + "graphId": "f51b705f-a409-4d40-9197-c5d5f349e2f0", + "name": "TestComputeCdsa" + } + ] + properties : List[dict], default=None + Tenant setting properties. Example: + [ + { + "name": "CreateP2w", + "value": "true", + "type": "Boolean" + } + ] + """ + + payload = {"enabled": enabled} + + if delegate_to_capacity is not None: + payload["delegateToCapacity"] = delegate_to_capacity + if delegate_to_domain is not None: + payload["delegateToDomain"] = delegate_to_domain + if delegate_to_workspace is not None: + payload["delegateToWorkspace"] = delegate_to_workspace + if enabled_security_groups is not None: + payload["enabledSecurityGroups"] = enabled_security_groups + if excluded_security_groups is not None: + payload["excludedSecurityGroups"] = excluded_security_groups + if properties is not None: + payload["properties"] = properties + + _base_api( + request=f"/v1/admin/tenantsettings/{tenant_setting}/update", + client="fabric_sp", + method="post", + payload=payload, + ) + + print(f"{icons.green_dot} The '{tenant_setting}' tenant setting has been updated.") + + +@log +def update_capacity_tenant_setting_override( + capacity: str | UUID, + tenant_setting: str, + enabled: bool, + delegate_to_workspace: Optional[bool] = None, + enabled_security_groups: Optional[List[dict]] = None, + excluded_security_groups: Optional[List[dict]] = None, +): + """ + Update given tenant setting override for given capacity. + + This is a wrapper function for the following API: `Tenants - Update Capacity Tenant Setting Override `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Parameters + ---------- + capacity : str | uuid.UUID + The capacity name or ID. + tenant_setting : str + The tenant setting name. Example: "TenantSettingForCapacityDelegatedSwitch" + enabled : bool + The status of the tenant setting. False - Disabled, True - Enabled. + delegate_to_workspace : bool, default=None + Indicates whether the tenant setting can be delegated to a workspace admin. False - Workspace admin cannot override the tenant setting. True - Workspace admin can override the tenant setting. + enabled_security_groups : List[dict], default=None + A list of enabled security groups. Example: + [ + { + "graphId": "f51b705f-a409-4d40-9197-c5d5f349e2f0", + "name": "TestComputeCdsa" + } + ] + excluded_security_groups : List[dict], default=None + A list of excluded security groups. Example: + [ + { + "graphId": "f51b705f-a409-4d40-9197-c5d5f349e2f0", + "name": "TestComputeCdsa" + } + ] + """ + + (capacity_name, capacity_id) = _resolve_capacity_name_and_id(capacity=capacity) + + payload = {"enabled": enabled} + + if delegate_to_workspace is not None: + payload["delegateToWorkspace"] = delegate_to_workspace + if enabled_security_groups is not None: + payload["enabledSecurityGroups"] = enabled_security_groups + if excluded_security_groups is not None: + payload["excludedSecurityGroups"] = excluded_security_groups + + _base_api( + request=f"/v1/admin/capacities/{capacity_id}/delegatedTenantSettingOverrides/{tenant_setting}/update", + client="fabric_sp", + method="post", + payload=payload, + ) + + print( + f"{icons.green_dot} The '{tenant_setting}' tenant setting for the '{capacity_name}' capacity has been updated." + ) + + +@log +def list_workspaces_tenant_settings_overrides() -> pd.DataFrame: + """ + Shows a list of workspace delegation setting overrides. In order to run this function, you must enable the workspace's delegated OneLake settings. To do this, navigate to the workspace, Workspace Settings -> Delegated Settings -> OneLake settings -> Set to 'On'. + + This is a wrapper function for the following API: `Tenants - List Workspaces Tenant Settings Overrides `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of workspace delegation setting overrides. + """ + + columns = { + "Workspace Id": "string", + "Setting Name": "string", + "Title": "string", + "Enabled": "bool", + "Can Specify Security Groups": "bool", + "Enabled Security Groups": "list", + "Tenant Setting Group": "string", + "Delegated From": "string", + } + df = _create_dataframe(columns=columns) + + responses = _base_api( + request="/v1/admin/workspaces/delegatedTenantSettingOverrides", + client="fabric_sp", + uses_pagination=True, + ) + + for r in responses: + for v in r.get("value", []): + workspace_id = v.get("id") + for setting in v.get("tenantSettings", []): + new_data = { + "Workspace Id": workspace_id, + "Setting Name": setting.get("settingName"), + "Title": setting.get("title"), + "Enabled": setting.get("enabled"), + "Can Specify Security Groups": setting.get( + "canSpecifySecurityGroups" + ), + "Enabled Security Groups": [ + setting.get("enabledSecurityGroups", []) + ], + "Tenant Setting Group": setting.get("tenantSettingGroup"), + "Delegated From": setting.get("delegatedFrom"), + } + + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + _update_dataframe_datatypes(dataframe=df, column_map=columns) + + return df + + +@log +def list_domain_tenant_settings_overrides() -> pd.DataFrame: + """ + Shows a list of domain delegation setting overrides. + + This is a wrapper function for the following API: `Tenants - List Domains Tenant Settings Overrides `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of domain delegation setting overrides. + """ + + columns = { + "Domain Id": "string", + "Setting Name": "string", + "Title": "string", + "Enabled": "bool", + "Can Specify Security Groups": "bool", + "Enabled Security Groups": "list", + "Tenant Setting Group": "string", + "Delegated To Workspace": "bool", + "Delegated From": "string", + } + df = _create_dataframe(columns=columns) + + responses = _base_api( + request="/v1/admin/domains/delegatedTenantSettingOverrides", + client="fabric_sp", + uses_pagination=True, + ) + + for r in responses: + for v in r.get("value", []): + domain_id = v.get("id") + for setting in v.get("tenantSettings", []): + new_data = { + "Domain Id": domain_id, + "Setting Name": setting.get("settingName"), + "Title": setting.get("title"), + "Enabled": setting.get("enabled"), + "Can Specify Security Groups": setting.get( + "canSpecifySecurityGroups" + ), + "Enabled Security Groups": [ + setting.get("enabledSecurityGroups", []) + ], + "Tenant Setting Group": setting.get("tenantSettingGroup"), + "Delegated To Workspace": setting.get("delegateToWorkspace"), + "Delegated From": setting.get("delegatedFrom"), + } + + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + _update_dataframe_datatypes(dataframe=df, column_map=columns) + + return df diff --git a/src/sempy_labs/admin/_users.py b/src/sempy_labs/admin/_users.py new file mode 100644 index 00000000..6b37718c --- /dev/null +++ b/src/sempy_labs/admin/_users.py @@ -0,0 +1,133 @@ +from sempy_labs._helper_functions import ( + _base_api, + _create_dataframe, + _update_dataframe_datatypes, +) +from uuid import UUID +import pandas as pd + + +def list_access_entities( + user_email_address: str, +) -> pd.DataFrame: + """ + Shows a list of permission details for Fabric and Power BI items the specified user can access. + + This is a wrapper function for the following API: `Users - List Access Entities `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Parameters + ---------- + user_email_address : str + The user's email address. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of permission details for Fabric and Power BI items the specified user can access. + """ + + columns = { + "Item Id": "string", + "Item Name": "string", + "Item Type": "string", + "Permissions": "string", + "Additional Permissions": "string", + } + df = _create_dataframe(columns=columns) + + responses = _base_api( + request=f"/v1/admin/users/{user_email_address}/access", + client="fabric_sp", + uses_pagination=True, + ) + + for r in responses: + for v in r.get("accessEntities", []): + new_data = { + "Item Id": v.get("id"), + "Item Name": v.get("displayName"), + "Item Type": v.get("itemAccessDetails", {}).get("type"), + "Permissions": v.get("itemAccessDetails", {}).get("permissions"), + "Additional Permissions": v.get("itemAccessDetails", {}).get( + "additionalPermissions" + ), + } + df = pd.concat([df, pd.DataFrame([new_data])], ignore_index=True) + + return df + + +def list_user_subscriptions(user: str | UUID) -> pd.DataFrame: + """ + Shows a list of subscriptions for the specified user. This is a preview API call. + + This is a wrapper function for the following API: `Admin - Users GetUserSubscriptionsAsAdmin `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Parameters + ---------- + user : str | uuid.UUID + The graph ID or user principal name (UPN) of the user. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of subscriptions for the specified user. This is a preview API call. + """ + + columns = { + "Subscription Id": "string", + "Title": "string", + "Artifact Id": "string", + "Artifact Name": "string", + "Sub Artifact Name": "string", + "Artifact Type": "string", + "Is Enabled": "bool", + "Frequency": "string", + "Start Date": "datetime", + "End Date": "string", + "Link To Content": "bool", + "Preview Image": "bool", + "Attachment Format": "string", + "Users": "string", + } + + df = _create_dataframe(columns=columns) + + responses = _base_api( + request=f"/v1.0/myorg/admin/users/{user}/subscriptions", + client="fabric_sp", + uses_pagination=True, + ) + + rows = [] + for r in responses: + for v in r.get("subscriptionEntities", []): + rows.append( + { + "Subscription Id": v.get("id"), + "Title": v.get("title"), + "Artifact Id": v.get("artifactId"), + "Artifact Name": v.get("artifactDisplayName"), + "Sub Artifact Name": v.get("subArtifactDisplayName"), + "Artifact Type": v.get("artifactType"), + "Is Enabled": v.get("isEnabled"), + "Frequency": v.get("frequency"), + "Start Date": v.get("startDate"), + "End Date": v.get("endDate"), + "Link To Content": v.get("linkToContent"), + "Preview Image": v.get("previewImage"), + "Attachment Format": v.get("attachmentFormat"), + "Users": str(v.get("users")), + } + ) + + if rows: + df = pd.DataFrame(rows, columns=list(columns.keys())) + + _update_dataframe_datatypes(dataframe=df, column_map=columns) + + return df diff --git a/src/sempy_labs/admin/_workspaces.py b/src/sempy_labs/admin/_workspaces.py new file mode 100644 index 00000000..65c89c53 --- /dev/null +++ b/src/sempy_labs/admin/_workspaces.py @@ -0,0 +1,148 @@ +from sempy_labs._helper_functions import ( + _base_api, + _build_url, + _encode_user, +) +from uuid import UUID +from typing import Optional +from sempy_labs.admin._basic_functions import ( + _resolve_workspace_name_and_id, +) +import sempy_labs._icons as icons + + +def add_user_to_workspace( + user: str | UUID, + role: str = "Member", + principal_type: str = "User", + workspace: Optional[str | UUID] = None, +): + """ + Grants user permissions to the specified workspace. + + This is a wrapper function for the following API: `Admin - Groups AddUserAsAdmin `_. + + Parameters + ---------- + user : str | uuid.UUID + The user identifier or email address. For service principals and groups you must use the user identifier. + role : str, default="Member" + The role of the user in the workspace. Options are: 'Admin', 'Contributor', 'Member', 'None', 'Viewer'. + principal_type : str, default="User" + The principal type of the user. Options are: 'App', 'Group', 'None', 'User'. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace_name, workspace_id) = _resolve_workspace_name_and_id(workspace) + + # Validation + role = role.capitalize() + roles = ["Admin", "Contributor", "Member", "None", "Viewer"] + if role not in roles: + raise ValueError(f"{icons.red_dot} Invalid role. Please choose from {roles}") + principal_types = ["App", "Group", "None", "User"] + if principal_type not in principal_types: + raise ValueError( + f"{icons.red_dot} Invalid principal type. Please choose from {principal_types}" + ) + + user = _encode_user(user) + + payload = { + "identifier": user, # identifier or emailAddress? + "principalType": principal_type, + "groupUserAccessRight": role, + } + + _base_api( + request=f"/v1.0/myorg/admin/groups/{workspace_id}/users", + method="post", + payload=payload, + ) + + print( + f"{icons.green_dot} The '{user}' user has been added with '{role.lower()}' permissions to the '{workspace_name}' workspace." + ) + + +def delete_user_from_workspace( + user: str | UUID, + workspace: Optional[str | UUID] = None, + is_group: Optional[bool] = None, + profile_id: Optional[str] = None, +): + """ + Removes user permissions from the specified workspace. + + This is a wrapper function for the following API: `Admin - Groups DeleteUserAsAdmin `_. + + Parameters + ---------- + user : str | uuid.UUID + The user identifier or email address. For service principals and groups you must use the user identifier. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + is_group : bool, default=None + Whether a given user is a group or not. This parameter is required when user to delete is group. + profile_id : str, default=None + The service principal profile ID to delete. + """ + + (workspace_name, workspace_id) = _resolve_workspace_name_and_id(workspace) + + user = _encode_user(user) + url = f"/v1.0/myorg/admin/groups/{workspace_id}/users/{user}" + + params = {} + if profile_id is not None: + params["profileId"] = profile_id + if is_group is not None: + params["isGroup"] = is_group + + url = _build_url(url, params) + + _base_api( + request=url, + method="delete", + ) + + print( + f"{icons.green_dot} The '{user}' user has been removed from the '{workspace_name}' workspace." + ) + + +def restore_deleted_workspace(workspace_id: UUID, name: str, email_address: str): + """ + Restores a deleted workspace. + + This is a wrapper function for the following API: `Admin - Groups RestoreDeletedGroupAsAdmin `_. + + Parameters + ---------- + workspace_id : uuid.UUID + The ID of the workspace to restore. + name : str + The name of the group to be restored + email_address : str + The email address of the owner of the group to be restored + """ + + payload = { + "name": name, + "emailAddress": email_address, + } + + _base_api( + request=f"/v1.0/myorg/admin/groups/{workspace_id}/restore", + method="post", + payload=payload, + ) + + print( + f"{icons.green_dot} The '{workspace_id}' workspace has been restored as '{name}'." + ) diff --git a/src/sempy_labs/directlake/_directlake_schema_compare.py b/src/sempy_labs/directlake/_directlake_schema_compare.py index 02b3ce20..2b6b7f3f 100644 --- a/src/sempy_labs/directlake/_directlake_schema_compare.py +++ b/src/sempy_labs/directlake/_directlake_schema_compare.py @@ -4,6 +4,7 @@ format_dax_object_name, resolve_workspace_name_and_id, resolve_dataset_name_and_id, + resolve_workspace_name, ) from IPython.display import display from sempy_labs.lakehouse import get_lakehouse_columns @@ -70,7 +71,7 @@ def direct_lake_schema_compare( f"{icons.red_dot} This function only supports Direct Lake semantic models where the source lakehouse resides in the same workpace as the semantic model." ) - lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id) + lakehouse_workspace = resolve_workspace_name(workspace_id=lakehouse_workspace_id) dfT = fabric.list_tables(dataset=dataset_id, workspace=workspace_id) dfC = fabric.list_columns(dataset=dataset_id, workspace=workspace_id) lc = get_lakehouse_columns(lakehouse_name, lakehouse_workspace) diff --git a/src/sempy_labs/directlake/_directlake_schema_sync.py b/src/sempy_labs/directlake/_directlake_schema_sync.py index 2934fdee..5d13d0f4 100644 --- a/src/sempy_labs/directlake/_directlake_schema_sync.py +++ b/src/sempy_labs/directlake/_directlake_schema_sync.py @@ -1,5 +1,5 @@ import sempy -import sempy.fabric as fabric +import pandas as pd from sempy_labs.lakehouse import get_lakehouse_columns from sempy_labs.directlake._dl_helper import get_direct_lake_source from sempy_labs.tom import connect_semantic_model @@ -19,8 +19,8 @@ def direct_lake_schema_sync( dataset: str | UUID, workspace: Optional[str | UUID] = None, add_to_model: bool = False, - **kwargs, -): + remove_from_model: bool = False, +) -> pd.DataFrame: """ Shows/adds columns which exist in the lakehouse but do not exist in the semantic model (only for tables in the semantic model). @@ -34,22 +34,18 @@ def direct_lake_schema_sync( or if no lakehouse attached, resolves to the workspace of the notebook. add_to_model : bool, default=False If set to True, columns which exist in the lakehouse but do not exist in the semantic model are added to the semantic model. No new tables are added. + remove_from_model : bool, default=False + If set to True, columns which exist in the semantic model but do not exist in the lakehouse are removed from the semantic model. No new tables are removed. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing the status of columns in the semantic model and lakehouse (prior to adding/removing them from the model using this function). """ sempy.fabric._client._utils._init_analysis_services() import Microsoft.AnalysisServices.Tabular as TOM - if "lakehouse" in kwargs: - print( - "The 'lakehouse' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward." - ) - del kwargs["lakehouse"] - if "lakehouse_workspace" in kwargs: - print( - "The 'lakehouse_workspace' parameter has been deprecated as it is no longer necessary. Please remove this parameter from the function going forward." - ) - del kwargs["lakehouse_workspace"] - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id) @@ -67,14 +63,54 @@ def direct_lake_schema_sync( f"{icons.red_dot} This function only supports Direct Lake semantic models where the source lakehouse resides in the same workpace as the semantic model." ) - lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id) + lc = get_lakehouse_columns(lakehouse_id, lakehouse_workspace_id) - lc = get_lakehouse_columns(lakehouse_name, lakehouse_workspace) + readonly = True + if add_to_model or remove_from_model: + readonly = False + df = pd.DataFrame( + columns=[ + "TableName", + "ColumnName", + "SourceTableName", + "SourceColumnName", + "Status", + ] + ) with connect_semantic_model( - dataset=dataset_id, readonly=False, workspace=workspace_id + dataset=dataset_id, readonly=readonly, workspace=workspace_id ) as tom: + # Check if the columns in the semantic model exist in the lakehouse + for c in tom.all_columns(): + partition_name = next(p.Name for p in c.Table.Partitions) + p = c.Table.Partitions[partition_name] + if p.SourceType == TOM.PartitionSourceType.Entity: + entity_name = p.Source.EntityName + source_column = c.SourceColumn + lc_filt = lc[ + (lc["Table Name"] == entity_name) + & (lc["Column Name"] == source_column) + ] + # Remove column from model if it doesn't exist in the lakehouse + if lc_filt.empty: + new_data = { + "TableName": c.Parent.Name, + "ColumnName": c.Name, + "SourceTableName": entity_name, + "SourceColumnName": source_column, + "Status": "Not in lakehouse", + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + if remove_from_model: + tom.remove_object(object=c) + print( + f"{icons.green_dot} The '{c.Parent.Name}'[{c.Name}] column has been removed from the '{dataset_name}' semantic model within the '{workspace_name}' workspace." + ) + # Check if the lakehouse columns exist in the semantic model for i, r in lc.iterrows(): lakeTName = r["Table Name"] lakeCName = r["Column Name"] @@ -97,9 +133,17 @@ def direct_lake_schema_sync( c.SourceColumn == lakeCName and c.Parent.Name == table_name for c in tom.all_columns() ): - print( - f"{icons.yellow_dot} The '{lakeCName}' column exists in the '{lakeTName}' lakehouse table but not in the '{dataset_name}' semantic model within the '{workspace_name}' workspace." + new_data = { + "TableName": table_name, + "ColumnName": None, + "SourceTableName": lakeTName, + "SourceColumnName": lakeCName, + "Status": "Not in semantic model", + } + df = pd.concat( + [df, pd.DataFrame(new_data, index=[0])], ignore_index=True ) + if add_to_model: dt = _convert_data_type(dType) tom.add_data_column( @@ -111,3 +155,5 @@ def direct_lake_schema_sync( print( f"{icons.green_dot} The '{lakeCName}' column in the '{lakeTName}' lakehouse table was added to the '{dataset_name}' semantic model within the '{workspace_name}' workspace." ) + + return df diff --git a/src/sempy_labs/directlake/_dl_helper.py b/src/sempy_labs/directlake/_dl_helper.py index bc33e72e..f8d96817 100644 --- a/src/sempy_labs/directlake/_dl_helper.py +++ b/src/sempy_labs/directlake/_dl_helper.py @@ -7,7 +7,6 @@ from sempy._utils._log import log from sempy_labs._helper_functions import ( retry, - resolve_lakehouse_name, _convert_data_type, resolve_dataset_name_and_id, resolve_workspace_name_and_id, @@ -129,11 +128,6 @@ def generate_direct_lake_semantic_model( (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) if lakehouse_workspace is None: lakehouse_workspace = workspace - if lakehouse is None: - lakehouse_id = fabric.get_lakehouse_id() - lakehouse_workspace_id = fabric.get_workspace_id() - lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id) - lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace) dfLT = get_lakehouse_tables(lakehouse=lakehouse, workspace=lakehouse_workspace) diff --git a/src/sempy_labs/directlake/_generate_shared_expression.py b/src/sempy_labs/directlake/_generate_shared_expression.py index 4db7eabc..13752ef3 100644 --- a/src/sempy_labs/directlake/_generate_shared_expression.py +++ b/src/sempy_labs/directlake/_generate_shared_expression.py @@ -1,10 +1,9 @@ -import sempy.fabric as fabric from sempy_labs._helper_functions import ( - resolve_lakehouse_name, - resolve_lakehouse_id, - resolve_warehouse_id, resolve_workspace_name_and_id, _base_api, + resolve_lakehouse_name_and_id, + resolve_item_name_and_id, + _get_fabric_context_setting, ) from typing import Optional import sempy_labs._icons as icons @@ -15,6 +14,7 @@ def generate_shared_expression( item_name: Optional[str] = None, item_type: str = "Lakehouse", workspace: Optional[str | UUID] = None, + use_sql_endpoint: bool = True, ) -> str: """ Dynamically generates the M expression used by a Direct Lake model for a given lakehouse/warehouse. @@ -30,6 +30,9 @@ def generate_shared_expression( The Fabric workspace name or ID used by the item. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. + use_sql_endpoint : bool, default=True + Whether to use the SQL Endpoint for the lakehouse/warehouse. + If False, the expression will be generated without using the SQL Endpoint. Returns ------- @@ -45,38 +48,45 @@ def generate_shared_expression( f"{icons.red_dot} Invalid item type. Valid options: {item_types}." ) - if item_name is None: - item_id = fabric.get_lakehouse_id() - item_name = resolve_lakehouse_name(item_id, workspace_id) - elif item_name is not None and item_type == "Lakehouse": - item_id = resolve_lakehouse_id(lakehouse=item_name, workspace=workspace_id) - elif item_type == "Warehouse": - item_id = resolve_warehouse_id(warehouse=item_name, workspace=workspace_id) + if item_type == "Lakehouse": + (item_name, item_id) = resolve_lakehouse_name_and_id( + lakehouse=item_name, workspace=workspace_id + ) + else: + (item_name, item_id) = resolve_item_name_and_id( + item=item_name, type=item_type, workspace=workspace_id + ) - item_type_rest = f"{item_type.lower()}s" - response = _base_api( - request=f"/v1/workspaces/{workspace_id}/{item_type_rest}/{item_id}" - ) + if use_sql_endpoint: + item_type_rest = f"{item_type.lower()}s" + response = _base_api( + request=f"/v1/workspaces/{workspace_id}/{item_type_rest}/{item_id}" + ) - prop = response.json().get("properties") + prop = response.json().get("properties") - if item_type == "Lakehouse": - sqlprop = prop.get("sqlEndpointProperties") - sqlEPCS = sqlprop.get("connectionString") - sqlepid = sqlprop.get("id") - provStatus = sqlprop.get("provisioningStatus") - elif item_type == "Warehouse": - sqlEPCS = prop.get("connectionString") - sqlepid = item_id - provStatus = None + if item_type == "Lakehouse": + sqlprop = prop.get("sqlEndpointProperties") + sqlEPCS = sqlprop.get("connectionString") + sqlepid = sqlprop.get("id") + provStatus = sqlprop.get("provisioningStatus") + elif item_type == "Warehouse": + sqlEPCS = prop.get("connectionString") + sqlepid = item_id + provStatus = None - if provStatus == "InProgress": - raise ValueError( - f"{icons.red_dot} The SQL Endpoint for the '{item_name}' lakehouse within the '{workspace_name}' workspace has not yet been provisioned. Please wait until it has been provisioned." - ) + if provStatus == "InProgress": + raise ValueError( + f"{icons.red_dot} The SQL Endpoint for the '{item_name}' {item_type.lower()} within the '{workspace_name}' workspace has not yet been provisioned. Please wait until it has been provisioned." + ) - start_expr = "let\n\tdatabase = " - end_expr = "\nin\n\tdatabase" - mid_expr = f'Sql.Database("{sqlEPCS}", "{sqlepid}")' + start_expr = "let\n\tdatabase = " + end_expr = "\nin\n\tdatabase" + mid_expr = f'Sql.Database("{sqlEPCS}", "{sqlepid}")' + return f"{start_expr}{mid_expr}{end_expr}" + else: + # Build DL/OL expression + env = _get_fabric_context_setting("spark.trident.pbienv").lower() + env = "" if env == "prod" else f"{env}-" - return f"{start_expr}{mid_expr}{end_expr}" + return f"""let\n\tSource = AzureStorage.DataLake("https://{env}onelake.dfs.fabric.microsoft.com/{workspace_id}/{item_id}")\nin\n\tSource""" diff --git a/src/sempy_labs/directlake/_guardrails.py b/src/sempy_labs/directlake/_guardrails.py index df031434..0aa7d288 100644 --- a/src/sempy_labs/directlake/_guardrails.py +++ b/src/sempy_labs/directlake/_guardrails.py @@ -48,6 +48,7 @@ def get_sku_size(workspace: Optional[str | UUID] = None) -> str: str The SKU size for a workspace. """ + from sempy_labs._capacities import list_capacities (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) @@ -59,7 +60,7 @@ def get_sku_size(workspace: Optional[str | UUID] = None) -> str: ) capacity_id = dfW["Capacity Id"].iloc[0] - dfC = fabric.list_capacities() + dfC = list_capacities() dfC_filt = dfC[dfC["Id"] == capacity_id] if len(dfC_filt) == 0: diff --git a/src/sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py b/src/sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py index 119d2aed..fb28a5a0 100644 --- a/src/sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py +++ b/src/sempy_labs/directlake/_update_directlake_model_lakehouse_connection.py @@ -1,16 +1,66 @@ -import sempy.fabric as fabric from sempy_labs.directlake._generate_shared_expression import generate_shared_expression from sempy_labs._helper_functions import ( - resolve_lakehouse_name, resolve_dataset_name_and_id, resolve_workspace_name_and_id, + resolve_item_name_and_id, + resolve_lakehouse_name_and_id, ) +from sempy._utils._log import log from sempy_labs.tom import connect_semantic_model -from typing import Optional +from typing import Optional, List import sempy_labs._icons as icons from uuid import UUID +import re +def _extract_expression_list(expression): + """ + Finds the pattern for DL/SQL & DL/OL expressions in the semantic model. + """ + + pattern_sql = r'Sql\.Database\s*\(\s*"([^"]+)"\s*,\s*"([^"]+)"\s*\)' + pattern_no_sql = ( + r'AzureStorage\.DataLake\(".*?/([0-9a-fA-F\-]{36})/([0-9a-fA-F\-]{36})"' + ) + + match_sql = re.search(pattern_sql, expression) + match_no_sql = re.search(pattern_no_sql, expression) + + result = [] + if match_sql: + value_1, value_2 = match_sql.groups() + result = [value_1, value_2, True] + elif match_no_sql: + value_1, value_2 = match_no_sql.groups() + result = [value_1, value_2, False] + + return result + + +def _get_direct_lake_expressions( + dataset: str | UUID, workspace: Optional[str | UUID] = None +) -> dict: + """ + Extracts a dictionary of all Direct Lake expressions from a semantic model. + """ + + from sempy_labs.tom import connect_semantic_model + + result = {} + + with connect_semantic_model(dataset=dataset, workspace=workspace) as tom: + for e in tom.model.Expressions: + expr_name = e.Name + expr = e.Expression + + list_values = _extract_expression_list(expr) + if list_values: + result[expr_name] = list_values + + return result + + +@log def update_direct_lake_model_lakehouse_connection( dataset: str | UUID, workspace: Optional[str | UUID] = None, @@ -37,54 +87,24 @@ def update_direct_lake_model_lakehouse_connection( or if no lakehouse attached, resolves to the workspace of the notebook. """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id) - - if lakehouse_workspace is None: - lakehouse_workspace = workspace_name - - if lakehouse is None: - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id, lakehouse_workspace) - - # Check if lakehouse is valid - dfI = fabric.list_items(workspace=lakehouse_workspace, type="Lakehouse") - dfI_filt = dfI[(dfI["Display Name"] == lakehouse)] - - if len(dfI_filt) == 0: - raise ValueError( - f"{icons.red_dot} The '{lakehouse}' lakehouse does not exist within the '{lakehouse_workspace}' workspace. " - f"Therefore it cannot be used to support the '{dataset_name}' semantic model within the '{workspace_name}' workspace." - ) - - icons.sll_tags.append("UpdateDLConnection") - - shEx = generate_shared_expression( - item_name=lakehouse, item_type="Lakehouse", workspace=lakehouse_workspace - ) - - with connect_semantic_model( - dataset=dataset_id, readonly=False, workspace=workspace_id - ) as tom: - - if not tom.is_direct_lake(): - raise ValueError( - f"{icons.red_dot} The '{dataset_name}' semantic model is not in Direct Lake. This function is only applicable to Direct Lake semantic models." - ) - - tom.model.Expressions["DatabaseQuery"].Expression = shEx - - print( - f"{icons.green_dot} The expression in the '{dataset_name}' semantic model has been updated to point to the '{lakehouse}' lakehouse in the '{lakehouse_workspace}' workspace." + update_direct_lake_model_connection( + dataset=dataset, + workspace=workspace, + source=lakehouse, + source_type="Lakehouse", + source_workspace=lakehouse_workspace, ) +@log def update_direct_lake_model_connection( dataset: str | UUID, workspace: Optional[str | UUID] = None, source: Optional[str] = None, source_type: str = "Lakehouse", source_workspace: Optional[str | UUID] = None, + use_sql_endpoint: bool = True, + tables: Optional[str | List[str]] = None, ): """ Remaps a Direct Lake semantic model's SQL Endpoint connection to a new lakehouse/warehouse. @@ -106,7 +126,21 @@ def update_direct_lake_model_connection( The Fabric workspace name or ID used by the lakehouse/warehouse. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. + use_sql_endpoint : bool, default=True + If True, the SQL Endpoint will be used for the connection. + If False, Direct Lake over OneLake will be used. + tables : str | List[str], default=None + The name(s) of the table(s) to update in the Direct Lake semantic model. + If None, all tables will be updated (if there is only one expression). + If multiple tables are specified, they must be provided as a list. """ + if use_sql_endpoint: + icons.sll_tags.append("UpdateDLConnection_SQL") + else: + icons.sll_tags.append("UpdateDLConnection_DLOL") + + if isinstance(tables, str): + tables = [tables] (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id) @@ -121,23 +155,25 @@ def update_direct_lake_model_connection( if source_workspace is None: source_workspace = workspace_name - if source is None: - source_id = fabric.get_lakehouse_id() - source = resolve_lakehouse_name(source_id, source_workspace) - else: - source_id = fabric.resolve_item_id( - item_name=source, type=source_type, workspace=source_workspace + if source_type == "Lakehouse": + (source_name, source_id) = resolve_lakehouse_name_and_id( + lakehouse=source, workspace=source_workspace ) - source = fabric.resolve_item_name( - item_id=source_id, workspace=source_workspace, type=source_type + else: + (source_name, source_id) = resolve_item_name_and_id( + item=source, type=source_type, workspace=source_workspace ) - icons.sll_tags.append("UpdateDLConnection") - - shEx = generate_shared_expression( - item_name=source, item_type=source_type, workspace=source_workspace + shared_expression = generate_shared_expression( + item_name=source_name, + item_type=source_type, + workspace=source_workspace, + use_sql_endpoint=use_sql_endpoint, ) + expression_dict = _get_direct_lake_expressions(dataset=dataset, workspace=workspace) + expressions = list(expression_dict.keys()) + with connect_semantic_model( dataset=dataset_id, readonly=False, workspace=workspace_id ) as tom: @@ -147,8 +183,55 @@ def update_direct_lake_model_connection( f"{icons.red_dot} The '{dataset_name}' semantic model within the '{workspace_name}' workspace is not in Direct Lake. This function is only applicable to Direct Lake semantic models." ) - tom.model.Expressions["DatabaseQuery"].Expression = shEx + # Update the single connection expression + if len(expressions) > 1 and not tables: + print( + f"{icons.info} Multiple expressions found in the model. Please specify the tables to update using the 'tables parameter." + ) + return + elif len(expressions) == 1 and not tables: + expr = expressions[0] + tom.model.Expressions[expr].Expression = shared_expression - print( - f"{icons.green_dot} The expression in the '{dataset_name}' semantic model within the '{workspace_name}' workspace has been updated to point to the '{source}' {source_type.lower()} in the '{source_workspace}' workspace." - ) + print( + f"{icons.green_dot} The expression in the '{dataset_name}' semantic model within the '{workspace_name}' workspace has been updated to point to the '{source}' {source_type.lower()} in the '{source_workspace}' workspace." + ) + else: + import sempy + + sempy.fabric._client._utils._init_analysis_services() + import Microsoft.AnalysisServices.Tabular as TOM + + expr_list = _extract_expression_list(shared_expression) + + expr_name = next( + (name for name, exp in expression_dict.items() if exp == expr_list), + None, + ) + + # If the expression does not already exist, create it + def generate_unique_name(existing_names): + i = 1 + while True: + candidate = f"DatabaseQuery{i}" + if candidate not in existing_names: + return candidate + i += 1 + + if not expr_name: + expr_name = generate_unique_name(expressions) + tom.add_expression(name=expr_name, expression=shared_expression) + + all_tables = [t.Name for t in tom.model.Tables] + for t_name in tables: + if t_name not in all_tables: + raise ValueError( + f"{icons.red_dot} The table '{t_name}' does not exist in the '{dataset_name}' semantic model within the '{workspace_name}' workspace." + ) + p = next(p for p in tom.model.Tables[t_name].Partitions) + if p.Mode != TOM.ModeType.DirectLake: + raise ValueError( + f"{icons.red_dot} The table '{t_name}' in the '{dataset_name}' semantic model within the '{workspace_name}' workspace is not in Direct Lake mode. This function is only applicable to Direct Lake tables." + ) + + p.Source.ExpressionSource = tom.model.Expressions[expr_name] diff --git a/src/sempy_labs/directlake/_update_directlake_partition_entity.py b/src/sempy_labs/directlake/_update_directlake_partition_entity.py index a878ad8c..46ad304c 100644 --- a/src/sempy_labs/directlake/_update_directlake_partition_entity.py +++ b/src/sempy_labs/directlake/_update_directlake_partition_entity.py @@ -1,5 +1,4 @@ import sempy -import sempy.fabric as fabric from sempy_labs.tom import connect_semantic_model from sempy_labs._refresh_semantic_model import refresh_semantic_model from sempy_labs.directlake._dl_helper import get_direct_lake_source @@ -7,16 +6,20 @@ _convert_data_type, resolve_dataset_name_and_id, resolve_workspace_name_and_id, + resolve_workspace_name, ) +from sempy._utils._log import log from typing import List, Optional, Union import sempy_labs._icons as icons from uuid import UUID +@log def update_direct_lake_partition_entity( dataset: str | UUID, table_name: Union[str, List[str]], entity_name: Union[str, List[str]], + schema: Optional[str] = None, workspace: Optional[str | UUID] = None, ): """ @@ -30,6 +33,9 @@ def update_direct_lake_partition_entity( Name of the table(s) in the semantic model. entity_name : str, List[str] Name of the lakehouse table to be mapped to the semantic model table. + schema : str, default=None + The schema of the lakehouse table to be mapped to the semantic model table. + Defaults to None which resolves to the existing schema of the lakehouse table. workspace : str | uuid.UUID, default=None The Fabric workspace name or ID in which the semantic model exists. Defaults to None which resolves to the workspace of the attached lakehouse @@ -79,13 +85,20 @@ def update_direct_lake_partition_entity( tom.model.Tables[tName].Partitions[part_name].Source.EntityName = eName # Update source lineage tag - schema = tom.model.Tables[tName].Partitions[part_name].Source.SchemaName or 'dbo' + existing_schema = ( + tom.model.Tables[tName].Partitions[part_name].Source.SchemaName or "dbo" + ) + if schema is None: + schema = existing_schema + + tom.model.Tables[tName].Partitions[part_name].Source.SchemaName = schema tom.model.Tables[tName].SourceLineageTag = f"[{schema}].[{eName}]" print( f"{icons.green_dot} The '{tName}' table in the '{dataset_name}' semantic model within the '{workspace_name}' workspace has been updated to point to the '{eName}' table." ) +@log def add_table_to_direct_lake_semantic_model( dataset: str | UUID, table_name: str, @@ -134,7 +147,7 @@ def add_table_to_direct_lake_semantic_model( f"{icons.red_dot} This function only supports Direct Lake semantic models where the source lakehouse resides in the same workpace as the semantic model." ) - lakehouse_workspace = fabric.resolve_workspace_name(lakehouse_workspace_id) + lakehouse_workspace = resolve_workspace_name(workspace_id=lakehouse_workspace_id) with connect_semantic_model( dataset=dataset_id, readonly=False, workspace=workspace_id diff --git a/src/sempy_labs/dotnet_lib/dotnet.runtime.config.json b/src/sempy_labs/dotnet_lib/dotnet.runtime.config.json new file mode 100644 index 00000000..76dcf4e9 --- /dev/null +++ b/src/sempy_labs/dotnet_lib/dotnet.runtime.config.json @@ -0,0 +1,10 @@ +{ + "runtimeOptions": { + "tfm": "net6.0", + "framework": { + "name": "Microsoft.NETCore.App", + "version": "6.0.0" + }, + "rollForward": "Major" + } +} \ No newline at end of file diff --git a/src/sempy_labs/graph/_groups.py b/src/sempy_labs/graph/_groups.py index c1cb01ad..24ac9bbc 100644 --- a/src/sempy_labs/graph/_groups.py +++ b/src/sempy_labs/graph/_groups.py @@ -6,6 +6,7 @@ _create_dataframe, _update_dataframe_datatypes, ) +from sempy._utils._log import log import sempy_labs._icons as icons from typing import List, Literal @@ -38,6 +39,7 @@ def resolve_group_id(group: str | UUID) -> UUID: return group_id +@log def list_groups() -> pd.DataFrame: """ Shows a list of groups and their properties. @@ -158,6 +160,7 @@ def _get_group(group_id: UUID) -> pd.DataFrame: return df +@log def list_group_members(group: str | UUID) -> pd.DataFrame: """ Shows a list of the members of a group. @@ -217,6 +220,7 @@ def list_group_members(group: str | UUID) -> pd.DataFrame: return df +@log def list_group_owners(group: str | UUID) -> pd.DataFrame: """ Shows a list of the owners of a group. @@ -332,6 +336,7 @@ def _base_add_to_group( ) +@log def add_group_members( group: str | UUID, user: str | UUID | List[str | UUID], @@ -376,6 +381,7 @@ def add_group_owners( _base_add_to_group(group=group, object=user, object_type="owners") +@log def renew_group(group: str | UUID): """ Renews the group. diff --git a/src/sempy_labs/graph/_teams.py b/src/sempy_labs/graph/_teams.py index e9943bda..31d4745a 100644 --- a/src/sempy_labs/graph/_teams.py +++ b/src/sempy_labs/graph/_teams.py @@ -1,5 +1,6 @@ import pandas as pd from uuid import UUID +from sempy._utils._log import log from sempy_labs._helper_functions import ( _base_api, _create_dataframe, @@ -7,6 +8,7 @@ ) +@log def list_teams() -> pd.DataFrame: """ Shows a list of teams and their properties. diff --git a/src/sempy_labs/graph/_users.py b/src/sempy_labs/graph/_users.py index e6d0832c..beec8886 100644 --- a/src/sempy_labs/graph/_users.py +++ b/src/sempy_labs/graph/_users.py @@ -7,6 +7,7 @@ _base_api, _create_dataframe, ) +from sempy._utils._log import log def resolve_user_id(user: str | UUID) -> UUID: @@ -33,6 +34,7 @@ def resolve_user_id(user: str | UUID) -> UUID: return result.get("id") +@log def get_user(user: str | UUID) -> pd.DataFrame: """ Shows properties of a given user. @@ -70,6 +72,7 @@ def get_user(user: str | UUID) -> pd.DataFrame: return pd.DataFrame([new_data]) +@log def list_users() -> pd.DataFrame: """ Shows a list of users and their properties. @@ -120,6 +123,7 @@ def list_users() -> pd.DataFrame: return df +@log def send_mail( user: UUID | str, subject: str, diff --git a/src/sempy_labs/lakehouse/__init__.py b/src/sempy_labs/lakehouse/__init__.py index 0c446ea3..4cdb720a 100644 --- a/src/sempy_labs/lakehouse/__init__.py +++ b/src/sempy_labs/lakehouse/__init__.py @@ -1,17 +1,34 @@ -from sempy_labs.lakehouse._get_lakehouse_columns import get_lakehouse_columns -from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables +from sempy_labs.lakehouse._get_lakehouse_columns import ( + get_lakehouse_columns, +) +from sempy_labs.lakehouse._get_lakehouse_tables import ( + get_lakehouse_tables, +) from sempy_labs.lakehouse._lakehouse import ( lakehouse_attached, optimize_lakehouse_tables, vacuum_lakehouse_tables, run_table_maintenance, ) - from sempy_labs.lakehouse._shortcuts import ( # create_shortcut, create_shortcut_onelake, delete_shortcut, reset_shortcut_cache, + list_shortcuts, +) +from sempy_labs.lakehouse._blobs import ( + recover_lakehouse_object, + list_blobs, +) +from sempy_labs.lakehouse._livy_sessions import ( + list_livy_sessions, +) +from sempy_labs.lakehouse._helper import ( + is_v_ordered, + delete_lakehouse, + update_lakehouse, + load_table, ) __all__ = [ @@ -25,4 +42,12 @@ "vacuum_lakehouse_tables", "reset_shortcut_cache", "run_table_maintenance", + "list_shortcuts", + "recover_lakehouse_object", + "list_blobs", + "list_livy_sessions", + "is_v_ordered", + "delete_lakehouse", + "update_lakehouse", + "load_table", ] diff --git a/src/sempy_labs/lakehouse/_blobs.py b/src/sempy_labs/lakehouse/_blobs.py new file mode 100644 index 00000000..2aaf5ce2 --- /dev/null +++ b/src/sempy_labs/lakehouse/_blobs.py @@ -0,0 +1,246 @@ +from sempy_labs._helper_functions import ( + resolve_workspace_id, + resolve_lakehouse_id, + _xml_to_dict, + _create_dataframe, + _update_dataframe_datatypes, +) +from sempy._utils._log import log +from uuid import UUID +from typing import Optional, List +import sempy_labs._icons as icons +import xml.etree.ElementTree as ET +import pandas as pd +from sempy.fabric.exceptions import FabricHTTPException + + +def _request_blob_api( + request: str, + method: str = "get", + payload: Optional[dict] = None, + status_codes: int | List[int] = 200, + uses_pagination: bool = False, +): + + import requests + import notebookutils + from sempy.fabric.exceptions import FabricHTTPException + + if isinstance(status_codes, int): + status_codes = [status_codes] + + token = notebookutils.credentials.getToken("storage") + + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/xml", + "x-ms-version": "2025-05-05", + } + + base_url = "https://onelake.blob.fabric.microsoft.com/" + full_url = f"{base_url}{request}" + results = [] + + while True: + response = requests.request( + method.upper(), + full_url, + headers=headers, + data=payload if method.lower() != "get" else None, + ) + + if response.status_code not in status_codes: + raise FabricHTTPException(response) + + if not uses_pagination: + return response + + # Parse XML to find blobs and NextMarker + root = ET.fromstring(response.content) + results.append(root) + + next_marker = root.findtext(".//NextMarker") + if not next_marker: + break # No more pages + + # Append the marker to the original request (assuming query string format) + delimiter = "&" if "?" in request else "?" + full_url = f"{base_url}{request}{delimiter}marker={next_marker}" + + return results + + +@log +def list_blobs( + lakehouse: Optional[str | UUID] = None, + workspace: Optional[str | UUID] = None, + container: Optional[str] = None, +) -> pd.DataFrame: + """ + Returns a list of blobs for a given lakehouse. + + This function leverages the following API: `List Blobs `_. + + Parameters + ---------- + lakehouse : str | uuid.UUID, default=None + The Fabric lakehouse name or ID. + Defaults to None which resolves to the lakehouse attached to the notebook. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID used by the lakehouse. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + container : str, default=None + The container name to list blobs from. If None, lists all blobs in the lakehouse. + Valid values are "Tables" or "Files". If not specified, the function will list all blobs in the lakehouse. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of blobs in the lakehouse. + """ + + workspace_id = resolve_workspace_id(workspace) + lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id) + + if container is None: + path_prefix = f"{workspace_id}/{lakehouse_id}" + else: + if container not in ["Tables", "Files"]: + raise ValueError( + f"{icons.red_dot} Invalid container '{container}' within the file_path parameter. Expected 'Tables' or 'Files'." + ) + path_prefix = f"{workspace_id}/{lakehouse_id}/{container}" + + columns = { + "Blob Name": "str", + "Is Deleted": "bool", + "Deletion Id": "str", + "Creation Time": "datetime", + "Expiry Time": "datetime", + "Etag": "str", + "Resource Type": "str", + "Content Length": "int", + "Content Type": "str", + "Content Encoding": "str", + "Content Language": "str", + "Content CRC64": "str", + "Content MD5": "str", + "Cache Control": "str", + "Content Disposition": "str", + "Blob Type": "str", + "Access Tier": "str", + "Access Tier Inferred": "str", + "Server Encrypted": "bool", + "Deleted Time": "str", + "Remaining Retention Days": "str", + } + + df = _create_dataframe(columns=columns) + + url = f"{path_prefix}?restype=container&comp=list&include=deleted" + + responses = _request_blob_api( + request=url, + uses_pagination=True, + ) + + dfs = [] + for root in responses: + response_json = _xml_to_dict(root) + + blobs = ( + response_json.get("EnumerationResults", {}).get("Blobs", {}).get("Blob", []) + ) + + if isinstance(blobs, dict): + blobs = [blobs] + + for blob in blobs: + p = blob.get("Properties", {}) + new_data = { + "Blob Name": blob.get("Name"), + "Is Deleted": blob.get("Deleted", False), + "Deletion Id": blob.get("DeletionId"), + "Creation Time": p.get("Creation-Time"), + "Expiry Time": p.get("Expiry-Time"), + "Etag": p.get("Etag"), + "Resource Type": p.get("ResourceType"), + "Content Length": p.get("Content-Length"), + "Content Type": p.get("Content-Type"), + "Content Encoding": p.get("Content-Encoding"), + "Content Language": p.get("Content-Language"), + "Content CRC64": p.get("Content-CRC64"), + "Content MD5": p.get("Content-MD5"), + "Cache Control": p.get("Cache-Control"), + "Content Disposition": p.get("Content-Disposition"), + "Blob Type": p.get("BlobType"), + "Access Tier": p.get("AccessTier"), + "Access Tier Inferred": p.get("AccessTierInferred"), + "Server Encrypted": p.get("ServerEncrypted"), + "Deleted Time": p.get("DeletedTime"), + "Remaining Retention Days": p.get("RemainingRetentionDays"), + } + + dfs.append(pd.DataFrame(new_data, index=[0])) + + if dfs: + df = pd.concat(dfs, ignore_index=True) + _update_dataframe_datatypes(dataframe=df, column_map=columns) + + return df + + +@log +def recover_lakehouse_object( + file_path: str, + lakehouse: Optional[str | UUID] = None, + workspace: Optional[str | UUID] = None, +): + """ + Recovers an object (i.e. table, file, folder) in a lakehouse from a deleted state. Only `soft-deleted objects `_ can be recovered (deleted for less than 7 days). + + Parameters + ---------- + file_path : str + The file path of the object to restore. For example: "Tables/my_delta_table". + lakehouse : str | uuid.UUID, default=None + The Fabric lakehouse name or ID. + Defaults to None which resolves to the lakehouse attached to the notebook. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID used by the lakehouse. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + workspace_id = resolve_workspace_id(workspace) + lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id) + + blob_name = f"{lakehouse_id}/{file_path}" + + container = file_path.split("/")[0] + if container not in ["Tables", "Files"]: + raise ValueError( + f"{icons.red_dot} Invalid container '{container}' within the file_path parameter. Expected 'Tables' or 'Files'." + ) + + # Undelete the blob + print(f"{icons.in_progress} Attempting to recover the '{blob_name}' blob...") + + try: + _request_blob_api( + request=f"{workspace_id}/{lakehouse_id}/{file_path}?comp=undelete", + method="put", + ) + print( + f"{icons.green_dot} The '{blob_name}' blob recover attempt was successful." + ) + except FabricHTTPException as e: + if e.status_code == 404: + print( + f"{icons.warning} The '{blob_name}' blob was not found. No action taken." + ) + else: + print( + f"{icons.red_dot} An error occurred while recovering the '{blob_name}' blob: {e}" + ) diff --git a/src/sempy_labs/lakehouse/_get_lakehouse_columns.py b/src/sempy_labs/lakehouse/_get_lakehouse_columns.py index 56f3bdb4..ad2d2a18 100644 --- a/src/sempy_labs/lakehouse/_get_lakehouse_columns.py +++ b/src/sempy_labs/lakehouse/_get_lakehouse_columns.py @@ -1,14 +1,17 @@ import pandas as pd +import re from sempy_labs._helper_functions import ( format_dax_object_name, resolve_workspace_name_and_id, resolve_lakehouse_name_and_id, _create_dataframe, - _create_spark_session, + _get_delta_table, + _pure_python_notebook, ) from typing import Optional from sempy._utils._log import log from uuid import UUID +import sempy_labs._icons as icons @log @@ -16,7 +19,9 @@ def get_lakehouse_columns( lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None ) -> pd.DataFrame: """ - Shows the tables and columns of a lakehouse and their respective properties. + Shows the tables and columns of a lakehouse and their respective properties. This function can be executed in either a PySpark or pure Python notebook. Note that data types may show differently when using PySpark vs pure Python. + + Service Principal Authentication is supported (see `here `_ for examples). Parameters ---------- @@ -34,7 +39,6 @@ def get_lakehouse_columns( Shows the tables/columns within a lakehouse and their properties. """ from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables - from delta import DeltaTable columns = { "Workspace Name": "string", @@ -51,29 +55,48 @@ def get_lakehouse_columns( lakehouse=lakehouse, workspace=workspace_id ) - spark = _create_spark_session() - tables = get_lakehouse_tables( lakehouse=lakehouse_id, workspace=workspace_id, extended=False, count_rows=False ) tables_filt = tables[tables["Format"] == "delta"] - for _, r in tables_filt.iterrows(): - table_name = r["Table Name"] - path = r["Location"] - delta_table = DeltaTable.forPath(spark, path) - sparkdf = delta_table.toDF() - - for col_name, data_type in sparkdf.dtypes: - full_column_name = format_dax_object_name(table_name, col_name) - new_data = { + def add_column_metadata(table_name, col_name, data_type): + new_rows.append( + { "Workspace Name": workspace_name, - "Lakehouse Name": lakehouse, + "Lakehouse Name": lakehouse_name, "Table Name": table_name, "Column Name": col_name, - "Full Column Name": full_column_name, + "Full Column Name": format_dax_object_name(table_name, col_name), "Data Type": data_type, } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + ) + + new_rows = [] + + for _, r in tables_filt.iterrows(): + table_name = r["Table Name"] + path = r["Location"] + + if _pure_python_notebook(): + from deltalake import DeltaTable + + table_schema = DeltaTable(path).schema() + + for field in table_schema.fields: + col_name = field.name + match = re.search(r'"(.*?)"', str(field.type)) + if not match: + raise ValueError( + f"{icons.red_dot} Could not find data type for column {col_name}." + ) + data_type = match.group(1) + add_column_metadata(table_name, col_name, data_type) + else: + delta_table = _get_delta_table(path=path) + table_df = delta_table.toDF() + + for col_name, data_type in table_df.dtypes: + add_column_metadata(table_name, col_name, data_type) - return df + return pd.concat([df, pd.DataFrame(new_rows)], ignore_index=True) diff --git a/src/sempy_labs/lakehouse/_get_lakehouse_tables.py b/src/sempy_labs/lakehouse/_get_lakehouse_tables.py index 73ac8d9b..01f3ab7b 100644 --- a/src/sempy_labs/lakehouse/_get_lakehouse_tables.py +++ b/src/sempy_labs/lakehouse/_get_lakehouse_tables.py @@ -1,7 +1,7 @@ -import sempy.fabric as fabric +import os import pandas as pd import pyarrow.parquet as pq -import datetime +from datetime import datetime from sempy_labs._helper_functions import ( _get_column_aggregate, resolve_workspace_name_and_id, @@ -9,7 +9,11 @@ save_as_delta_table, _base_api, _create_dataframe, - _create_spark_session, + _read_delta_table, + _get_delta_table, + _mount, + create_abfss_path, + _pure_python_notebook, ) from sempy_labs.directlake._guardrails import ( get_sku_size, @@ -33,8 +37,12 @@ def get_lakehouse_tables( """ Shows the tables of a lakehouse and their respective properties. Option to include additional properties relevant to Direct Lake guardrails. + This function can be executed in either a PySpark or pure Python notebook. + This is a wrapper function for the following API: `Tables - List Tables `_ plus extended capabilities. + Service Principal Authentication is supported (see `here `_ for examples). + Parameters ---------- lakehouse : str | uuid.UUID, default=None @@ -75,19 +83,10 @@ def get_lakehouse_tables( if count_rows: # Setting countrows defaults to extended=True extended = True - if ( - workspace_id != fabric.get_workspace_id() - and lakehouse_id != fabric.get_lakehouse_id() - and count_rows - ): - raise ValueError( - f"{icons.red_dot} If 'count_rows' is set to True, you must run this function against the default lakehouse attached to the notebook. " - "Count rows runs a spark query and cross-workspace spark queries are currently not supported." - ) - responses = _base_api( request=f"v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables", uses_pagination=True, + client="fabric_sp", ) if not responses[0].get("data"): @@ -112,40 +111,59 @@ def get_lakehouse_tables( if extended: sku_value = get_sku_size(workspace_id) guardrail = get_directlake_guardrails_for_sku(sku_value) - spark = _create_spark_session() - df["Files"] = None - df["Row Groups"] = None - df["Table Size"] = None + local_path = _mount(lakehouse=lakehouse_id, workspace=workspace_id) + + df["Files"], df["Row Groups"], df["Table Size"] = None, None, None if count_rows: df["Row Count"] = None + for i, r in df.iterrows(): - tName = r["Table Name"] + table_name = r["Table Name"] if r["Type"] == "Managed" and r["Format"] == "delta": - detail_df = spark.sql(f"DESCRIBE DETAIL `{tName}`").collect()[0] - num_files = detail_df.numFiles - size_in_bytes = detail_df.sizeInBytes - - delta_table_path = f"Tables/{tName}" - latest_files = ( - spark.read.format("delta").load(delta_table_path).inputFiles() + delta_table_path = create_abfss_path( + lakehouse_id, workspace_id, table_name ) - file_paths = [f.split("/")[-1] for f in latest_files] - # Handle FileNotFoundError + if _pure_python_notebook(): + from deltalake import DeltaTable + + delta_table = DeltaTable(delta_table_path) + latest_files = [ + file["path"] + for file in delta_table.get_add_actions().to_pylist() + ] + size_in_bytes = 0 + for f in latest_files: + local_file_path = os.path.join( + local_path, "Tables", table_name, os.path.basename(f) + ) + if os.path.exists(local_file_path): + size_in_bytes += os.path.getsize(local_file_path) + num_latest_files = len(latest_files) + else: + delta_table = _get_delta_table(delta_table_path) + latest_files = _read_delta_table(delta_table_path).inputFiles() + table_df = delta_table.toDF() + table_details = delta_table.detail().collect()[0].asDict() + num_latest_files = table_details.get("numFiles", 0) + size_in_bytes = table_details.get("sizeInBytes", 0) + + table_path = os.path.join(local_path, "Tables", table_name) + file_paths = [os.path.basename(f) for f in latest_files] + num_rowgroups = 0 for filename in file_paths: - try: - num_rowgroups += pq.ParquetFile( - f"/lakehouse/default/{delta_table_path}/{filename}" - ).num_row_groups - except FileNotFoundError: - continue - df.at[i, "Files"] = num_files + parquet_file = pq.ParquetFile(f"{table_path}/{filename}") + num_rowgroups += parquet_file.num_row_groups + df.at[i, "Files"] = num_latest_files df.at[i, "Row Groups"] = num_rowgroups df.at[i, "Table Size"] = size_in_bytes if count_rows: - num_rows = spark.table(tName).count() - df.at[i, "Row Count"] = num_rows + if _pure_python_notebook(): + row_count = delta_table.to_pyarrow_table().num_rows + else: + row_count = table_df.count() + df.at[i, "Row Count"] = row_count if extended: intColumns = ["Files", "Row Groups", "Table Size"] @@ -168,19 +186,16 @@ def get_lakehouse_tables( if export: if not lakehouse_attached(): raise ValueError( - f"{icons.red_dot} In order to save the report.json file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook." + f"{icons.red_dot} In order to save the dataframe, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook." ) - (current_lakehouse_name, current_lakehouse_id) = resolve_lakehouse_name_and_id() - lakeTName = "lakehouse_table_details" - lakeT_filt = df[df["Table Name"] == lakeTName] + lake_table_name = "lakehouse_table_details" + df_filt = df[df["Table Name"] == lake_table_name] - if len(lakeT_filt) == 0: + if df_filt.empty: run_id = 1 else: - max_run_id = _get_column_aggregate( - lakehouse=current_lakehouse_name, table_name=lakeTName - ) + max_run_id = _get_column_aggregate(table_name=lake_table_name) run_id = max_run_id + 1 export_df = df.copy() @@ -224,13 +239,13 @@ def get_lakehouse_tables( export_df[c] = export_df[c].astype(bool) print( - f"{icons.in_progress} Saving Lakehouse table properties to the '{lakeTName}' table in the lakehouse...\n" + f"{icons.in_progress} Saving Lakehouse table properties to the '{lake_table_name}' table in the lakehouse...\n" ) - export_df["Timestamp"] = datetime.datetime.now() + export_df["Timestamp"] = datetime.now() export_df["RunId"] = run_id save_as_delta_table( - dataframe=export_df, delta_table_name=lakeTName, write_mode="append" + dataframe=export_df, delta_table_name=lake_table_name, write_mode="append" ) return df diff --git a/src/sempy_labs/lakehouse/_helper.py b/src/sempy_labs/lakehouse/_helper.py new file mode 100644 index 00000000..e340c4d4 --- /dev/null +++ b/src/sempy_labs/lakehouse/_helper.py @@ -0,0 +1,211 @@ +from uuid import UUID +from typing import Optional, Literal +import pyarrow.dataset as ds +from sempy_labs._helper_functions import ( + _mount, + delete_item, + _base_api, + resolve_workspace_name_and_id, + resolve_lakehouse_name_and_id, +) +from sempy._utils._log import log +import sempy_labs._icons as icons +import os + + +@log +def is_v_ordered( + table_name: str, + lakehouse: Optional[str | UUID] = None, + workspace: Optional[str | UUID] = None, + schema: Optional[str] = None, +) -> bool: + """ + Checks if a delta table is v-ordered. + + Parameters + ---------- + table_name : str + The name of the table to check. + lakehouse : str | uuid.UUID, default=None + The Fabric lakehouse name or ID. + Defaults to None which resolves to the lakehouse attached to the notebook. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID used by the lakehouse. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + schema : str, optional + The schema of the table to check. If not provided, the default schema is used. + + Returns + ------- + bool + True if the table is v-ordered, False otherwise. + """ + + local_path = _mount(lakehouse=lakehouse, workspace=workspace) + table_path = ( + f"{local_path}/Tables/{schema}/{table_name}" + if schema + else f"{local_path}/Tables/{table_name}" + ) + ds_schema = ds.dataset(table_path).schema.metadata + + return any(b"vorder" in key for key in ds_schema.keys()) + + +def delete_lakehouse( + lakehouse: str | UUID, workspace: Optional[str | UUID] = None +) -> None: + """ + Deletes a lakehouse. + + This is a wrapper function for the following API: `Items - Delete Lakehouse `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Parameters + ---------- + lakehouse : str | uuid.UUID + The name or ID of the lakehouse to delete. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID used by the lakehouse. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + delete_item(item=lakehouse, item_type="lakehouse", workspace=workspace) + + +def update_lakehouse( + name: Optional[str] = None, + description: Optional[str] = None, + lakehouse: Optional[str | UUID] = None, + workspace: Optional[str | UUID] = None, +): + """ + Updates a lakehouse. + + This is a wrapper function for the following API: `Items - Update Lakehouse `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Parameters + ---------- + name: str, default=None + The new name of the lakehouse. + Defaults to None which does not update the name. + description: str, default=None + The new description of the lakehouse. + Defaults to None which does not update the description. + lakehouse : str | uuid.UUID, default=None + The name or ID of the lakehouse to update. + Defaults to None which resolves to the lakehouse attached to the notebook. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID used by the lakehouse. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + if not name and not description: + raise ValueError( + f"{icons.red_dot} Either name or description must be provided." + ) + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id( + lakehouse, workspace_id + ) + + payload = {} + if name: + payload["displayName"] = name + if description: + payload["description"] = description + + _base_api( + request=f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}", + method="patch", + client="fabric_sp", + payload=payload, + ) + + print( + f"{icons.green_dot} The '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace has been updated accordingly." + ) + + +@log +def load_table( + table_name: str, + file_path: str, + mode: Literal["Overwrite", "Append"], + lakehouse: Optional[str | UUID] = None, + workspace: Optional[str | UUID] = None, +): + """ + Loads a table into a lakehouse. Currently only files are supported, not folders. + + This is a wrapper function for the following API: `Tables - Load Table `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Parameters + ---------- + table_name : str + The name of the table to load. + file_path : str + The path to the data to load. + mode : Literal["Overwrite", "Append"] + The mode to use when loading the data. + "Overwrite" will overwrite the existing data. + "Append" will append the data to the existing data. + lakehouse : str | uuid.UUID, default=None + The name or ID of the lakehouse to load the table into. + Defaults to None which resolves to the lakehouse attached to the notebook. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID used by the lakehouse. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id( + lakehouse, workspace_id + ) + + file_extension = os.path.splitext(file_path)[1] + + payload = { + "relativePath": file_path, + "pathType": "File", + "mode": mode, + "formatOptions": {}, + } + + if file_extension == ".csv": + payload["formatOptions"] = {"format": "Csv", "header": True, "delimiter": ","} + elif file_extension == ".parquet": + payload["formatOptions"] = { + "format": "Parquet", + "header": True, + } + # Solve for loading folders + # elif file_extension == '': + # payload['pathType'] = "Folder" + # payload["recursive"] = recursive + # payload['formatOptions'] + else: + raise NotImplementedError() + + _base_api( + request=f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/tables/{table_name}/load", + client="fabric_sp", + method="post", + status_codes=202, + lro_return_status_code=True, + ) + + print( + f"{icons.green_dot} The '{table_name}' table has been loaded into the '{lakehouse_name}' lakehouse within the '{workspace_name}' workspace." + ) diff --git a/src/sempy_labs/lakehouse/_lakehouse.py b/src/sempy_labs/lakehouse/_lakehouse.py index 9b385843..68be4d2d 100644 --- a/src/sempy_labs/lakehouse/_lakehouse.py +++ b/src/sempy_labs/lakehouse/_lakehouse.py @@ -7,6 +7,7 @@ resolve_lakehouse_name_and_id, resolve_workspace_name_and_id, _create_spark_session, + _pure_python_notebook, ) import sempy_labs._icons as icons import re @@ -32,10 +33,37 @@ def lakehouse_attached() -> bool: return False +def _optimize_table(path): + + if _pure_python_notebook(): + from deltalake import DeltaTable + + DeltaTable(path).optimize.compact() + else: + from delta import DeltaTable + + spark = _create_spark_session() + DeltaTable.forPath(spark, path).optimize().executeCompaction() + + +def _vacuum_table(path, retain_n_hours): + + if _pure_python_notebook(): + from deltalake import DeltaTable + + DeltaTable(path).vacuum(retention_hours=retain_n_hours) + else: + from delta import DeltaTable + + spark = _create_spark_session() + spark.conf.set("spark.databricks.delta.vacuum.parallelDelete.enabled", "true") + DeltaTable.forPath(spark, path).vacuum(retain_n_hours) + + @log def optimize_lakehouse_tables( tables: Optional[Union[str, List[str]]] = None, - lakehouse: Optional[str] = None, + lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None, ): """ @@ -46,8 +74,8 @@ def optimize_lakehouse_tables( tables : str | List[str], default=None The table(s) to optimize. Defaults to None which resovles to optimizing all tables within the lakehouse. - lakehouse : str, default=None - The Fabric lakehouse. + lakehouse : str | uuid.UUID, default=None + The Fabric lakehouse name or ID. Defaults to None which resolves to the lakehouse attached to the notebook. workspace : str | uuid.UUID, default=None The Fabric workspace name or ID used by the lakehouse. @@ -56,33 +84,26 @@ def optimize_lakehouse_tables( """ from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables - from delta import DeltaTable - lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace) - lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"] + df = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace) + df_delta = df[df["Format"] == "delta"] if isinstance(tables, str): tables = [tables] - if tables is not None: - tables_filt = lakeTablesDelta[lakeTablesDelta["Table Name"].isin(tables)] - else: - tables_filt = lakeTablesDelta.copy() - - spark = _create_spark_session() + df_tables = df_delta[df_delta["Table Name"].isin(tables)] if tables else df_delta - for _, r in (bar := tqdm(tables_filt.iterrows())): - tableName = r["Table Name"] - tablePath = r["Location"] - bar.set_description(f"Optimizing the '{tableName}' table...") - deltaTable = DeltaTable.forPath(spark, tablePath) - deltaTable.optimize().executeCompaction() + for _, r in (bar := tqdm(df_tables.iterrows())): + table_name = r["Table Name"] + path = r["Location"] + bar.set_description(f"Optimizing the '{table_name}' table...") + _optimize_table(path=path) @log def vacuum_lakehouse_tables( tables: Optional[Union[str, List[str]]] = None, - lakehouse: Optional[str] = None, + lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None, retain_n_hours: Optional[int] = None, ): @@ -92,9 +113,9 @@ def vacuum_lakehouse_tables( Parameters ---------- tables : str | List[str] | None - The table(s) to vacuum. If no tables are specified, all tables in the lakehouse will be optimized. - lakehouse : str, default=None - The Fabric lakehouse. + The table(s) to vacuum. If no tables are specified, all tables in the lakehouse will be vacuumed. + lakehouse : str | uuid.UUID, default=None + The Fabric lakehouse name or ID. Defaults to None which resolves to the lakehouse attached to the notebook. workspace : str | uuid.UUID, default=None The Fabric workspace name or ID used by the lakehouse. @@ -107,34 +128,21 @@ def vacuum_lakehouse_tables( The default retention period is 168 hours (7 days) unless manually configured via table properties. """ - from pyspark.sql import SparkSession from sempy_labs.lakehouse._get_lakehouse_tables import get_lakehouse_tables - from delta import DeltaTable - lakeTables = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace) - lakeTablesDelta = lakeTables[lakeTables["Format"] == "delta"] + df = get_lakehouse_tables(lakehouse=lakehouse, workspace=workspace) + df_delta = df[df["Format"] == "delta"] if isinstance(tables, str): tables = [tables] - if tables is not None: - tables_filt = lakeTablesDelta[lakeTablesDelta["Table Name"].isin(tables)] - else: - tables_filt = lakeTablesDelta.copy() - - spark = _create_spark_session() - spark.conf.set("spark.databricks.delta.vacuum.parallelDelete.enabled", "true") - - for _, r in (bar := tqdm(tables_filt.iterrows())): - tableName = r["Table Name"] - tablePath = r["Location"] - bar.set_description(f"Vacuuming the '{tableName}' table...") - deltaTable = DeltaTable.forPath(spark, tablePath) + df_tables = df_delta[df_delta["Table Name"].isin(tables)] if tables else df_delta - if retain_n_hours is None: - deltaTable.vacuum() - else: - deltaTable.vacuum(retain_n_hours) + for _, r in (bar := tqdm(df_tables.iterrows())): + table_name = r["Table Name"] + path = r["Location"] + bar.set_description(f"Vacuuming the '{table_name}' table...") + _vacuum_table(path=path, retain_n_hours=retain_n_hours) def run_table_maintenance( diff --git a/src/sempy_labs/lakehouse/_livy_sessions.py b/src/sempy_labs/lakehouse/_livy_sessions.py new file mode 100644 index 00000000..db01f575 --- /dev/null +++ b/src/sempy_labs/lakehouse/_livy_sessions.py @@ -0,0 +1,137 @@ +from sempy_labs._helper_functions import ( + resolve_workspace_id, + resolve_lakehouse_id, + _base_api, + _create_dataframe, + _update_dataframe_datatypes, +) +import pandas as pd +from typing import Optional +from uuid import UUID + + +def list_livy_sessions( + lakehouse: Optional[str | UUID] = None, workspace: Optional[str | UUID] = None +) -> pd.DataFrame: + """ + Shows a list of livy sessions from the specified item identifier. + + This is a wrapper function for the following API: `Livy Sessions - List Livy Sessions `_. + + Service Principal Authentication is supported (see `here `_ for examples). + + Parameters + ---------- + lakehouse : str | uuid.UUID, default=None + The Fabric lakehouse name or ID. + Defaults to None which resolves to the lakehouse attached to the notebook. + workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of livy sessions from the specified item identifier. + """ + + columns = { + "Spark Application Id": "string", + "State:": "string", + "Livy Id": "string", + "Origin": "string", + "Attempt Number": "int", + "Max Number Of Attempts": "int", + "Livy Name": "string", + "Submitter Id": "string", + "Submitter Type": "string", + "Item Workspace Id": "string", + "Item Id": "string", + "Item Reference Type": "string", + "Item Name": "string", + "Item Type": "string", + "Job Type": "string", + "Submitted Date Time": "str", + "Start Date Time": "str", + "End Date Time": "string", + "Queued Duration Value": "int", + "Queued Duration Time Unit": "string", + "Running Duration Value": "int", + "Running Duration Time Unit": "string", + "Total Duration Value": "int", + "Total Duration Time Unit": "string", + "Job Instance Id": "string", + "Creator Item Workspace Id": "string", + "Creator Item Id": "string", + "Creator Item Reference Type": "string", + "Creator Item Name": "string", + "Creator Item Type": "string", + "Cancellation Reason": "string", + "Capacity Id": "string", + "Operation Name": "string", + "Runtime Version": "string", + "Livy Session Item Resource Uri": "string", + } + df = _create_dataframe(columns=columns) + + workspace_id = resolve_workspace_id(workspace) + lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id) + + responses = _base_api( + request=f"/v1/workspaces/{workspace_id}/lakehouses/{lakehouse_id}/livySessions", + uses_pagination=True, + client="fabric_sp", + ) + + dfs = [] + + for r in responses: + for v in r.get("value", []): + queued_duration = v.get("queuedDuration", {}) + running_duration = v.get("runningDuration", {}) + total_duration = v.get("totalDuration", {}) + new_data = { + "Spark Application Id": v.get("sparkApplicationId"), + "State:": v.get("state"), + "Livy Id": v.get("livyId"), + "Origin": v.get("origin"), + "Attempt Number": v.get("attemptNumber"), + "Max Number Of Attempts": v.get("maxNumberOfAttempts"), + "Livy Name": v.get("livyName"), + "Submitter Id": v["submitter"].get("id"), + "Submitter Type": v["submitter"].get("type"), + "Item Workspace Id": v["item"].get("workspaceId"), + "Item Id": v["item"].get("itemId"), + "Item Reference Type": v["item"].get("referenceType"), + "Item Name": v.get("itemName"), + "Item Type": v.get("itemType"), + "Job Type": v.get("jobType"), + "Submitted Date Time": v.get("submittedDateTime"), + "Start Date Time": v.get("startDateTime"), + "End Date Time": v.get("endDateTime"), + "Queued Duration Value": queued_duration.get("value"), + "Queued Duration Time Unit": queued_duration.get("timeUnit"), + "Running Duration Value": running_duration.get("value"), + "Running Duration Time Unit": running_duration.get("timeUnit"), + "Total Duration Value": total_duration.get("value"), + "Total Duration Time Unit": total_duration.get("timeUnit"), + "Job Instance Id": v.get("jobInstanceId"), + "Creator Item Workspace Id": v["creatorItem"].get("workspaceId"), + "Creator Item Id": v["creatorItem"].get("itemId"), + "Creator Item Reference Type": v["creatorItem"].get("referenceType"), + "Creator Item Name": v.get("creatorItemName"), + "Creator Item Type": v.get("creatorItemType"), + "Cancellation Reason": v.get("cancellationReason"), + "Capacity Id": v.get("capacityId"), + "Operation Name": v.get("operationName"), + "Runtime Version": v.get("runtimeVersion"), + "Livy Session Item Resource Uri": v.get("livySessionItemResourceUri"), + } + dfs.append(pd.DataFrame(new_data, index=[0])) + + if dfs: + df = pd.concat(dfs, ignore_index=True) + _update_dataframe_datatypes(dataframe=df, column_map=columns) + + return df diff --git a/src/sempy_labs/lakehouse/_shortcuts.py b/src/sempy_labs/lakehouse/_shortcuts.py index 2162ce1f..b427dafa 100644 --- a/src/sempy_labs/lakehouse/_shortcuts.py +++ b/src/sempy_labs/lakehouse/_shortcuts.py @@ -1,27 +1,32 @@ import sempy.fabric as fabric +import pandas as pd from sempy_labs._helper_functions import ( resolve_item_name_and_id, - resolve_lakehouse_name, - resolve_lakehouse_id, + resolve_lakehouse_name_and_id, resolve_workspace_name_and_id, _base_api, + _create_dataframe, + resolve_workspace_name, ) +from sempy._utils._log import log from typing import Optional import sempy_labs._icons as icons -from sempy.fabric.exceptions import FabricHTTPException from uuid import UUID +from sempy.fabric.exceptions import FabricHTTPException +@log def create_shortcut_onelake( table_name: str, source_workspace: str | UUID, - destination_lakehouse: str, + destination_lakehouse: Optional[str | UUID] = None, destination_workspace: Optional[str | UUID] = None, shortcut_name: Optional[str] = None, source_item: str | UUID = None, source_item_type: str = "Lakehouse", source_path: str = "Tables", destination_path: str = "Tables", + shortcut_conflict_policy: Optional[str] = None, **kwargs, ): """ @@ -29,14 +34,17 @@ def create_shortcut_onelake( This is a wrapper function for the following API: `OneLake Shortcuts - Create Shortcut `_. + Service Principal Authentication is supported (see `here `_ for examples). + Parameters ---------- table_name : str The table name for which a shortcut will be created. source_workspace : str | uuid.UUID The name or ID of the Fabric workspace in which the source data store exists. - destination_lakehouse : str + destination_lakehouse : str | uuid.UUID, default=None The Fabric lakehouse in which the shortcut will be created. + Defaults to None which resolves to the lakehouse attached to the notebook. destination_workspace : str | uuid.UUID, default=None The name or ID of the Fabric workspace in which the shortcut will be created. Defaults to None which resolves to the workspace of the attached lakehouse @@ -51,6 +59,8 @@ def create_shortcut_onelake( A string representing the full path to the table/file in the source lakehouse, including either "Files" or "Tables". Examples: Tables/FolderName/SubFolderName; Files/FolderName/SubFolderName. destination_path: str, default="Tables" A string representing the full path where the shortcut is created, including either "Files" or "Tables". Examples: Tables/FolderName/SubFolderName; Files/FolderName/SubFolderName. + shortcut_conflict_policy : str, default=None + When provided, it defines the action to take when a shortcut with the same name and path already exists. The default action is 'Abort'. Additional ShortcutConflictPolicy types may be added over time. """ if source_item is None: @@ -87,18 +97,13 @@ def create_shortcut_onelake( item=source_item, type=source_item_type, workspace=source_workspace_id ) - if destination_workspace is None: - destination_workspace_name = source_workspace_name - destination_workspace_id = source_workspace_id - else: - destination_workspace_name = destination_workspace - destination_workspace_id = fabric.resolve_workspace_id( - destination_workspace_name + (destination_workspace_name, destination_workspace_id) = ( + resolve_workspace_name_and_id(destination_workspace) + ) + (destination_lakehouse_name, destination_lakehouse_id) = ( + resolve_lakehouse_name_and_id( + lakehouse=destination_lakehouse, workspace=destination_workspace_id ) - - destination_workspace_id = fabric.resolve_workspace_id(destination_workspace) - (destination_lakehouse_name, destination_lakehouse_id) = resolve_item_name_and_id( - item=destination_lakehouse, type="Lakehouse", workspace=destination_workspace_id ) if shortcut_name is None: @@ -106,9 +111,11 @@ def create_shortcut_onelake( source_full_path = f"{source_path}/{table_name}" + actual_shortcut_name = shortcut_name.replace(" ", "") + payload = { "path": destination_path, - "name": shortcut_name.replace(" ", ""), + "name": actual_shortcut_name, "target": { "oneLake": { "workspaceId": source_workspace_id, @@ -118,15 +125,45 @@ def create_shortcut_onelake( }, } + # Check if the shortcut already exists + try: + response = _base_api( + request=f"/v1/workspaces/{destination_workspace_id}/items/{destination_lakehouse_id}/shortcuts/{destination_path}/{actual_shortcut_name}", + client="fabric_sp", + ) + response_json = response.json() + del response_json["target"]["type"] + if response_json.get("target") == payload.get("target"): + print( + f"{icons.info} The '{actual_shortcut_name}' shortcut already exists in the '{destination_lakehouse_name}' lakehouse within the '{destination_workspace_name}' workspace." + ) + return + else: + raise ValueError( + f"{icons.red_dot} The '{actual_shortcut_name}' shortcut already exists in the '{destination_lakehouse_name} lakehouse within the '{destination_workspace_name}' workspace but has a different source." + ) + except FabricHTTPException: + pass + + url = f"/v1/workspaces/{destination_workspace_id}/items/{destination_lakehouse_id}/shortcuts" + + if shortcut_conflict_policy: + if shortcut_conflict_policy not in ["Abort", "GenerateUniqueName"]: + raise ValueError( + f"{icons.red_dot} The 'shortcut_conflict_policy' parameter must be either 'Abort' or 'GenerateUniqueName'." + ) + url += f"?shortcutConflictPolicy={shortcut_conflict_policy}" + _base_api( - request=f"/v1/workspaces/{destination_workspace_id}/items/{destination_lakehouse_id}/shortcuts", + request=url, payload=payload, status_codes=201, method="post", + client="fabric_sp", ) print( - f"{icons.green_dot} The shortcut '{shortcut_name}' was created in the '{destination_lakehouse_name}' lakehouse within the '{destination_workspace_name} workspace. It is based on the '{table_name}' table in the '{source_item_name}' {source_item_type} within the '{source_workspace_name}' workspace." + f"{icons.green_dot} The shortcut '{shortcut_name}' was created in the '{destination_lakehouse_name}' lakehouse within the '{destination_workspace_name}' workspace. It is based on the '{table_name}' table in the '{source_item_name}' {source_item_type} within the '{source_workspace_name}' workspace." ) @@ -168,17 +205,14 @@ def create_shortcut( sourceTitle = source_titles[source] - (workspace, workspace_id) = resolve_workspace_name_and_id(workspace) - - if lakehouse is None: - lakehouse_id = fabric.get_lakehouse_id() - else: - lakehouse_id = resolve_lakehouse_id(lakehouse, workspace) + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id( + lakehouse=lakehouse, workspace=workspace_id + ) - client = fabric.FabricRestClient() shortcutActualName = shortcut_name.replace(" ", "") - request_body = { + payload = { "path": "Tables", "name": shortcutActualName, "target": { @@ -190,22 +224,16 @@ def create_shortcut( }, } - try: - response = client.post( - f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts", - json=request_body, - ) - if response.status_code == 201: - print( - f"{icons.green_dot} The shortcut '{shortcutActualName}' was created in the '{lakehouse}' lakehouse within" - f" the '{workspace} workspace. It is based on the '{subpath}' table in '{sourceTitle}'." - ) - else: - print(response.status_code) - except Exception as e: - raise ValueError( - f"{icons.red_dot} Failed to create a shortcut for the '{shortcut_name}' table." - ) from e + _base_api( + request=f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts", + method="post", + payload=payload, + status_codes=201, + ) + print( + f"{icons.green_dot} The shortcut '{shortcutActualName}' was created in the '{lakehouse_name}' lakehouse within" + f" the '{workspace_name}' workspace. It is based on the '{subpath}' table in '{sourceTitle}'." + ) def delete_shortcut( @@ -219,13 +247,15 @@ def delete_shortcut( This is a wrapper function for the following API: `OneLake Shortcuts - Delete Shortcut `_. + Service Principal Authentication is supported (see `here `_ for examples). + Parameters ---------- shortcut_name : str The name of the shortcut. shortcut_path : str = "Tables" The path of the shortcut to be deleted. Must start with either "Files" or "Tables". Examples: Tables/FolderName/SubFolderName; Files/FolderName/SubFolderName. - lakehouse : str, default=None + lakehouse : str | uuid.UUID, default=None The Fabric lakehouse name in which the shortcut resides. Defaults to None which resolves to the lakehouse attached to the notebook. workspace : str | UUID, default=None @@ -235,20 +265,16 @@ def delete_shortcut( """ (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id( + lakehouse=lakehouse, workspace=workspace_id + ) - if lakehouse is None: - lakehouse_id = fabric.get_lakehouse_id() - lakehouse = resolve_lakehouse_name(lakehouse_id, workspace_id) - else: - lakehouse_id = resolve_lakehouse_id(lakehouse, workspace_id) - - client = fabric.FabricRestClient() - response = client.delete( - f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts/{shortcut_path}/{shortcut_name}" + _base_api( + request=f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts/{shortcut_path}/{shortcut_name}", + method="delete", + client="fabric_sp", ) - if response.status_code != 200: - raise FabricHTTPException(response) print( f"{icons.green_dot} The '{shortcut_name}' shortcut in the '{lakehouse}' within the '{workspace_name}' workspace has been deleted." ) @@ -280,3 +306,135 @@ def reset_shortcut_cache(workspace: Optional[str | UUID] = None): print( f"{icons.green_dot} The shortcut cache has been reset for the '{workspace_name}' workspace." ) + + +@log +def list_shortcuts( + lakehouse: Optional[str | UUID] = None, + workspace: Optional[str | UUID] = None, + path: Optional[str] = None, +) -> pd.DataFrame: + """ + Shows all shortcuts which exist in a Fabric lakehouse and their properties. + + Parameters + ---------- + lakehouse : str | uuid.UUID, default=None + The Fabric lakehouse name or ID. + Defaults to None which resolves to the lakehouse attached to the notebook. + workspace : str | uuid.UUID, default=None + The name or ID of the Fabric workspace in which lakehouse resides. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + path: str, default=None + The path within lakehouse where to look for shortcuts. If provided, must start with either "Files" or "Tables". Examples: Tables/FolderName/SubFolderName; Files/FolderName/SubFolderName. + Defaults to None which will retun all shortcuts on the given lakehouse + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing all the shortcuts which exist in the specified lakehouse. + """ + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id( + lakehouse=lakehouse, workspace=workspace_id + ) + + columns = { + "Shortcut Name": "string", + "Shortcut Path": "string", + "Source Type": "string", + "Source Workspace Id": "string", + "Source Workspace Name": "string", + "Source Item Id": "string", + "Source Item Name": "string", + "Source Item Type": "string", + "OneLake Path": "string", + "Connection Id": "string", + "Location": "string", + "Bucket": "string", + "SubPath": "string", + "Source Properties Raw": "string", + } + df = _create_dataframe(columns=columns) + + # To improve performance create a dataframe to cache all items for a given workspace + itm_clms = { + "Id": "string", + "Display Name": "string", + "Description": "string", + "Type": "string", + "Workspace Id": "string", + } + source_items_df = _create_dataframe(columns=itm_clms) + + url = f"/v1/workspaces/{workspace_id}/items/{lakehouse_id}/shortcuts" + + if path is not None: + url += f"?parentPath={path}" + + responses = _base_api( + request=url, + uses_pagination=True, + ) + + sources = { + "AdlsGen2": "adlsGen2", + "AmazonS3": "amazonS3", + "Dataverse": "dataverse", + "ExternalDataShare": "externalDataShare", + "GoogleCloudStorage": "googleCloudStorage", + "OneLake": "oneLake", + "S3Compatible": "s3Compatible", + } + + for r in responses: + for i in r.get("value", []): + tgt = i.get("target", {}) + tgt_type = tgt.get("type") + connection_id = tgt.get(sources.get(tgt_type), {}).get("connectionId") + location = tgt.get(sources.get(tgt_type), {}).get("location") + sub_path = tgt.get(sources.get(tgt_type), {}).get("subpath") + source_workspace_id = tgt.get(sources.get(tgt_type), {}).get("workspaceId") + source_item_id = tgt.get(sources.get(tgt_type), {}).get("itemId") + bucket = tgt.get(sources.get(tgt_type), {}).get("bucket") + source_workspace_name = ( + resolve_workspace_name(workspace_id=source_workspace_id) + if source_workspace_id is not None + else None + ) + # Cache and use it to getitem type and name + source_item_type = None + source_item_name = None + dfI = source_items_df[ + source_items_df["Workspace Id"] == source_workspace_id + ] + if dfI.empty: + dfI = fabric.list_items(workspace=source_workspace_id) + source_items_df = pd.concat([source_items_df, dfI], ignore_index=True) + + dfI_filt = dfI[dfI["Id"] == source_item_id] + if not dfI_filt.empty: + source_item_type = dfI_filt["Type"].iloc[0] + source_item_name = dfI_filt["Display Name"].iloc[0] + + new_data = { + "Shortcut Name": i.get("name"), + "Shortcut Path": i.get("path"), + "Source Type": tgt_type, + "Source Workspace Id": source_workspace_id, + "Source Workspace Name": source_workspace_name, + "Source Item Id": source_item_id, + "Source Item Name": source_item_name, + "Source Item Type": source_item_type, + "OneLake Path": tgt.get(sources.get("oneLake"), {}).get("path"), + "Connection Id": connection_id, + "Location": location, + "Bucket": bucket, + "SubPath": sub_path, + "Source Properties Raw": str(tgt), + } + df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) + + return df diff --git a/src/sempy_labs/migration/_direct_lake_to_import.py b/src/sempy_labs/migration/_direct_lake_to_import.py index 267b9168..b366a506 100644 --- a/src/sempy_labs/migration/_direct_lake_to_import.py +++ b/src/sempy_labs/migration/_direct_lake_to_import.py @@ -1,11 +1,16 @@ import sempy from uuid import UUID import sempy_labs._icons as icons +from typing import Optional -def migrate_direct_lake_to_import(dataset: str | UUID, workspace: str | UUID): +def migrate_direct_lake_to_import( + dataset: str | UUID, + workspace: Optional[str | UUID] = None, + mode: str = "import", +): """ - Migrates a semantic model from Direct Lake mode to import mode. After running this function, you must go to the semantic model settings and update the cloud connection. Not doing so will result in an inablity to refresh/use the semantic model. + Migrates a semantic model or specific table(s) from a Direct Lake mode to import or DirectQuery mode. After running this function, you must go to the semantic model settings and update the cloud connection. Not doing so will result in an inablity to refresh/use the semantic model. Parameters ---------- @@ -15,12 +20,29 @@ def migrate_direct_lake_to_import(dataset: str | UUID, workspace: str | UUID): The Fabric workspace name or ID. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. + mode : str, default="import" + The mode to migrate to. Can be either "import" or "directquery". """ sempy.fabric._client._utils._init_analysis_services() import Microsoft.AnalysisServices.Tabular as TOM from sempy_labs.tom import connect_semantic_model + modes = { + "import": "Import", + "directquery": "DirectQuery", + "dq": "DirectQuery", + } + + # Resolve mode + mode = mode.lower() + actual_mode = modes.get(mode) + if actual_mode is None: + raise ValueError(f"Invalid mode '{mode}'. Must be one of {list(modes.keys())}.") + + # if isinstance(tables, str): + # tables = [tables] + with connect_semantic_model( dataset=dataset, workspace=workspace, readonly=False ) as tom: @@ -31,7 +53,14 @@ def migrate_direct_lake_to_import(dataset: str | UUID, workspace: str | UUID): ) return - for t in tom.model.Tables: + # if tables is None: + table_list = [t for t in tom.model.Tables] + # else: + # table_list = [t for t in tom.model.Tables if t.Name in tables] + # if not table_list: + # raise ValueError(f"{icons.red_dot} No tables found to migrate.") + + for t in table_list: table_name = t.Name if t.Partitions.Count == 1 and all( p.Mode == TOM.ModeType.DirectLake for p in t.Partitions @@ -51,16 +80,24 @@ def migrate_direct_lake_to_import(dataset: str | UUID, workspace: str | UUID): table_name=table_name, partition_name=partition_name, expression=expression, - mode="Import", + mode=actual_mode, ) # Remove Direct Lake partition tom.remove_object(object=p) + # if tables is not None: + # print( + # f"{icons.green_dot} The '{table_name}' table has been migrated to '{actual_mode}' mode." + # ) tom.model.Model.DefaultMode = TOM.ModeType.Import + # if tables is None: + print( + f"{icons.green_dot} All tables which were in Direct Lake mode have been migrated to '{actual_mode}' mode." + ) - # Check - # for t in tom.model.Tables: - # if t.Partitions.Count == 1 and all(p.Mode == TOM.ModeType.Import for p in t.Partitions) and t.CalculationGroup is None: - # p = next(p for p in t.Partitions) - # print(p.Name) - # print(p.Source.Expression) + # Check + # for t in tom.model.Tables: + # if t.Partitions.Count == 1 and all(p.Mode == TOM.ModeType.Import for p in t.Partitions) and t.CalculationGroup is None: + # p = next(p for p in t.Partitions) + # print(p.Name) + # print(p.Source.Expression) diff --git a/src/sempy_labs/migration/_migrate_calctables_to_lakehouse.py b/src/sempy_labs/migration/_migrate_calctables_to_lakehouse.py index 9abc9547..36a2293a 100644 --- a/src/sempy_labs/migration/_migrate_calctables_to_lakehouse.py +++ b/src/sempy_labs/migration/_migrate_calctables_to_lakehouse.py @@ -6,10 +6,9 @@ from sempy_labs._helper_functions import ( resolve_lakehouse_name, resolve_lakehouse_id, - create_abfss_path, retry, generate_guid, - _create_spark_session, + save_as_delta_table, ) from sempy_labs.tom import connect_semantic_model from typing import Optional @@ -98,8 +97,6 @@ def migrate_calc_tables_to_lakehouse( if killFunction: return - spark = _create_spark_session() - if len(dfP_filt) == 0: print( f"{icons.yellow_dot} The '{dataset}' semantic model in the '{workspace}' workspace has no calculated tables." @@ -198,14 +195,12 @@ def migrate_calc_tables_to_lakehouse( delta_table_name = t.Name.replace(" ", "_").lower() - spark_df = spark.createDataFrame(df) - filePath = create_abfss_path( - lakehouse_id=lakehouse_id, - lakehouse_workspace_id=lakehouse_workspace_id, - delta_table_name=delta_table_name, - ) - spark_df.write.mode("overwrite").format("delta").save( - filePath + save_as_delta_table( + dataframe=df, + table_name=delta_table_name, + lakehouse=lakehouse, + workspace=lakehouse_workspace, + write_mode="overwrite", ) @retry( diff --git a/src/sempy_labs/migration/_migration_validation.py b/src/sempy_labs/migration/_migration_validation.py index 99b51666..96c6b121 100644 --- a/src/sempy_labs/migration/_migration_validation.py +++ b/src/sempy_labs/migration/_migration_validation.py @@ -42,10 +42,6 @@ def migration_validation( f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values." ) - workspace = fabric.resolve_workspace_name(workspace) - if new_dataset_workspace is None: - new_dataset_workspace = workspace - icons.sll_tags.append("DirectLakeMigration") dfA = list_semantic_model_objects(dataset=dataset, workspace=workspace) diff --git a/src/sempy_labs/migration/_refresh_calc_tables.py b/src/sempy_labs/migration/_refresh_calc_tables.py index cdb3a151..e8b0fc76 100644 --- a/src/sempy_labs/migration/_refresh_calc_tables.py +++ b/src/sempy_labs/migration/_refresh_calc_tables.py @@ -1,7 +1,6 @@ import sempy.fabric as fabric import pandas as pd import re -from sempy_labs._helper_functions import retry from sempy_labs.tom import connect_semantic_model from typing import Optional from sempy._utils._log import log @@ -10,7 +9,8 @@ from sempy_labs._helper_functions import ( resolve_workspace_name_and_id, resolve_dataset_name_and_id, - _create_spark_session, + save_as_delta_table, + retry, ) @@ -29,7 +29,6 @@ def refresh_calc_tables(dataset: str | UUID, workspace: Optional[str | UUID] = N or if no lakehouse attached, resolves to the workspace of the notebook. """ - spark = _create_spark_session() (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) (dataset_name, dataset_id) = resolve_dataset_name_and_id(dataset, workspace_id) icons.sll_tags.append("DirectLakeMigration") @@ -117,10 +116,12 @@ def dyn_connect(): f"{icons.in_progress} Refresh of the '{delta_table_name}' table within the lakehouse is in progress..." ) - spark_df = spark.createDataFrame(df) - spark_df.write.mode("overwrite").format("delta").saveAsTable( - delta_table_name + save_as_delta_table( + dataframe=df, + table_name=delta_table_name, + write_mode="overwrite", ) + print( f"{icons.green_dot} Calculated table '{tName}' has been refreshed as the '{delta_table_name.lower()}' table in the lakehouse." ) diff --git a/src/sempy_labs/mirrored_azure_databricks_catalog/__init__.py b/src/sempy_labs/mirrored_azure_databricks_catalog/__init__.py new file mode 100644 index 00000000..dc7c0126 --- /dev/null +++ b/src/sempy_labs/mirrored_azure_databricks_catalog/__init__.py @@ -0,0 +1,15 @@ +from sempy_labs.mirrored_azure_databricks_catalog._refresh_catalog_metadata import ( + refresh_catalog_metadata, +) +from sempy_labs.mirrored_azure_databricks_catalog._discover import ( + discover_catalogs, + discover_schemas, + discover_tables, +) + +__all__ = [ + "refresh_catalog_metadata", + "discover_catalogs", + "discover_schemas", + "discover_tables", +] diff --git a/src/sempy_labs/mirrored_azure_databricks_catalog/_discover.py b/src/sempy_labs/mirrored_azure_databricks_catalog/_discover.py new file mode 100644 index 00000000..8404eb73 --- /dev/null +++ b/src/sempy_labs/mirrored_azure_databricks_catalog/_discover.py @@ -0,0 +1,209 @@ +from uuid import UUID +from typing import Optional +from sempy_labs._helper_functions import ( + resolve_workspace_id, + _base_api, + _create_dataframe, +) +import pandas as pd + + +def discover_catalogs( + databricks_workspace_connection_id: UUID, + workspace: Optional[str | UUID] = None, + max_results: Optional[int] = None, +) -> pd.DataFrame: + """ + Returns a list of catalogs from Unity Catalog. + + This is a wrapper function for the following API: `Databricks Metadata Discovery - Discover Catalogs `_. + + Parameters + ---------- + databricks_workspace_connection_id : uuid.UUID + The ID of the Databricks workspace connection. + workspace : str | uuid.UUID, default=None + The workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + max_results : int, default=None + The maximum number of results to return. If not specified, all results are returned. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of catalogs from Unity Catalog. + """ + + workspace_id = resolve_workspace_id(workspace) + + url = f"/v1/workspaces/{workspace_id}/azuredatabricks/catalogs?databricksWorkspaceConnectionId={databricks_workspace_connection_id}" + if max_results: + url += f"&maxResults={max_results}" + + responses = _base_api(request=url, uses_pagination=True) + + columns = { + "Catalog Name": "str", + "Catalog Full Name": "str", + "Catalog Type": "str", + "Storage Location": "str", + } + + df = _create_dataframe(columns=columns) + + dfs = [] + for r in responses: + for i in r.get("value", []): + new_data = { + "Catalog Name": i.get("name"), + "Catalog Full Name": i.get("fullName"), + "Catalog Type": i.get("catalogType"), + "Storage Location": i.get("storageLocation"), + } + + dfs.append(pd.DataFrame(new_data, index=[0])) + + if dfs: + df = pd.concat(dfs, ignore_index=True) + + return df + + +def discover_schemas( + catalog: str, + databricks_workspace_connection_id: UUID, + workspace: Optional[str | UUID] = None, + max_results: Optional[int] = None, +) -> pd.DataFrame: + """ + Returns a list of schemas in the given catalog from Unity Catalog. + + This is a wrapper function for the following API: `Databricks Metadata Discovery - Discover Schemas `_. + + Parameters + ---------- + catalog : str + The name of the catalog. + databricks_workspace_connection_id : uuid.UUID + The ID of the Databricks workspace connection. + workspace : str | uuid.UUID, default=None + The workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + max_results : int, default=None + The maximum number of results to return. If not specified, all results are returned. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of schemas in the given catalog from Unity Catalog. + """ + + workspace_id = resolve_workspace_id(workspace) + + url = f"/v1/workspaces/{workspace_id}/azuredatabricks/catalogs/{catalog}/schemas?databricksWorkspaceConnectionId={databricks_workspace_connection_id}" + if max_results: + url += f"&maxResults={max_results}" + + responses = _base_api(request=url, uses_pagination=True) + + columns = { + "Catalog Name": "str", + "Schema Name": "str", + "Schema Full Name": "str", + "Storage Location": "str", + } + + df = _create_dataframe(columns=columns) + + dfs = [] + for r in responses: + for i in r.get("value", []): + new_data = { + "Catalog Name": catalog, + "Schema Name": i.get("name"), + "Schema Full Name": i.get("fullName"), + "Storage Location": i.get("storageLocation"), + } + + dfs.append(pd.DataFrame(new_data, index=[0])) + + if dfs: + df = pd.concat(dfs, ignore_index=True) + + return df + + +def discover_tables( + catalog: str, + schema: str, + databricks_workspace_connection_id: UUID, + workspace: Optional[str | UUID] = None, + max_results: Optional[int] = None, +) -> pd.DataFrame: + """ + Returns a list of schemas in the given catalog from Unity Catalog. + + This is a wrapper function for the following API: `Databricks Metadata Discovery - Discover Tables `_. + + Parameters + ---------- + catalog : str + The name of the catalog. + schema : str + The name of the schema. + databricks_workspace_connection_id : uuid.UUID + The ID of the Databricks workspace connection. + workspace : str | uuid.UUID, default=None + The workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + max_results : int, default=None + The maximum number of results to return. If not specified, all results are returned. + + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of schemas in the given catalog from Unity Catalog. + """ + + workspace_id = resolve_workspace_id(workspace) + + url = f"/v1/workspaces/{workspace_id}/azuredatabricks/catalogs/{catalog}/schemas/{schema}/tables?databricksWorkspaceConnectionId={databricks_workspace_connection_id}" + if max_results: + url += f"&maxResults={max_results}" + + responses = _base_api(request=url, uses_pagination=True) + + columns = { + "Catalog Name": "str", + "Schema Name": "str", + "Table Name": "str", + "Table Full Name": "str", + "Storage Location": "str", + "Table Type": "str", + "Data Source Format": "str", + } + + df = _create_dataframe(columns=columns) + + dfs = [] + for r in responses: + for i in r.get("value", []): + new_data = { + "Catalog Name": catalog, + "Schema Name": schema, + "Table Name": i.get("name"), + "Table Full Name": i.get("fullName"), + "Storage Location": i.get("storageLocation"), + "Table Type": i.get("tableType"), + "Data Source Format": i.get("dataSourceFormat"), + } + + dfs.append(pd.DataFrame(new_data, index=[0])) + + if dfs: + df = pd.concat(dfs, ignore_index=True) + + return df diff --git a/src/sempy_labs/mirrored_azure_databricks_catalog/_refresh_catalog_metadata.py b/src/sempy_labs/mirrored_azure_databricks_catalog/_refresh_catalog_metadata.py new file mode 100644 index 00000000..fc58ee89 --- /dev/null +++ b/src/sempy_labs/mirrored_azure_databricks_catalog/_refresh_catalog_metadata.py @@ -0,0 +1,43 @@ +from uuid import UUID +from typing import Optional +from sempy_labs._helper_functions import ( + resolve_workspace_name_and_id, + resolve_item_name_and_id, + _base_api, +) +import sempy_labs._icons as icons + + +def refresh_catalog_metadata( + mirrored_azure_databricks_catalog: str | UUID, + workspace: Optional[str | UUID] = None, +): + """ + Refresh Databricks catalog metadata in mirroredAzureDatabricksCatalogs Item. + + This is a wrapper function for the following API: `Refresh Metadata - Items RefreshCatalogMetadata `_. + + Parameters + ---------- + mirrored_azure_databricks_catalog : str | uuid.UUID + The name or ID of the mirrored Azure Databricks catalog. + workspace : str | uuie.UUID, default=None + The workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook + """ + + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + (catalog_name, catalog_id) = resolve_item_name_and_id( + mirrored_azure_databricks_catalog + ) + + _base_api( + request=f"/v1/workspaces/{workspace_id}/mirroredAzureDatabricksCatalogs/{catalog_id}/refreshCatalogMetadata", + method="post", + lro_return_status_code=True, + ) + + print( + f"{icons.green_dot} The '{catalog_name}' Databricks Catalog metadata within the '{workspace_name}' workspace has been refreshed." + ) diff --git a/src/sempy_labs/report/__init__.py b/src/sempy_labs/report/__init__.py index a5e28a97..17ac1942 100644 --- a/src/sempy_labs/report/__init__.py +++ b/src/sempy_labs/report/__init__.py @@ -1,5 +1,9 @@ +from sempy_labs.report._save_report import ( + save_report_as_pbip, +) from sempy_labs.report._reportwrapper import ( ReportWrapper, + connect_report, ) from sempy_labs.report._paginated import ( get_report_datasources, @@ -46,4 +50,6 @@ "run_report_bpa", "get_report_datasources", "download_report", + "save_report_as_pbip", + "connect_report", ] diff --git a/src/sempy_labs/report/_download_report.py b/src/sempy_labs/report/_download_report.py index 0bf68317..d4e13c54 100644 --- a/src/sempy_labs/report/_download_report.py +++ b/src/sempy_labs/report/_download_report.py @@ -3,9 +3,11 @@ from typing import Optional from sempy_labs._helper_functions import ( resolve_workspace_name_and_id, - resolve_lakehouse_name, + resolve_lakehouse_name_and_id, _base_api, resolve_item_id, + _mount, + resolve_workspace_name, ) from sempy_labs.lakehouse._lakehouse import lakehouse_attached from uuid import UUID @@ -20,7 +22,7 @@ def download_report( """ Downloads the specified report from the specified workspace to a Power BI .pbix file. - This is a wrapper function for the following API: `Reports - Export Report In Group `. + This is a wrapper function for the following API: `Reports - Export Report In Group `_. Parameters ---------- @@ -43,11 +45,8 @@ def download_report( ) (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - lakehouse_id = fabric.get_lakehouse_id() - lakehouse_workspace = fabric.resolve_workspace_name() - lakehouse_name = resolve_lakehouse_name( - lakehouse_id=lakehouse_id, workspace=lakehouse_workspace - ) + (lakehouse_name, lakehouse_id) = resolve_lakehouse_name_and_id() + lakehouse_workspace = resolve_workspace_name() download_types = ["LiveConnect", "IncludeModel"] if download_type not in download_types: @@ -63,7 +62,9 @@ def download_report( ) # Save file to the attached lakehouse - with open(f"/lakehouse/default/Files/{file_name}.pbix", "wb") as file: + local_path = _mount() + save_file = f"{local_path}/Files/{file_name}.pbix" + with open(save_file, "wb") as file: file.write(response.content) print( diff --git a/src/sempy_labs/report/_export_report.py b/src/sempy_labs/report/_export_report.py index b8c0f86d..ba276d94 100644 --- a/src/sempy_labs/report/_export_report.py +++ b/src/sempy_labs/report/_export_report.py @@ -5,7 +5,6 @@ from sempy_labs._helper_functions import ( generate_embedded_filter, resolve_workspace_name_and_id, - resolve_lakehouse_name_and_id, _base_api, _mount, ) diff --git a/src/sempy_labs/report/_generate_report.py b/src/sempy_labs/report/_generate_report.py index e30f4c24..783cc88f 100644 --- a/src/sempy_labs/report/_generate_report.py +++ b/src/sempy_labs/report/_generate_report.py @@ -11,6 +11,7 @@ _update_dataframe_datatypes, _base_api, resolve_item_id, + get_item_definition, ) import sempy_labs._icons as icons from sempy._utils._log import log @@ -177,8 +178,11 @@ def update_report_from_reportjson( ) +@log def get_report_definition( - report: str, workspace: Optional[str | UUID] = None, return_dataframe: bool = True + report: str | UUID, + workspace: Optional[str | UUID] = None, + return_dataframe: bool = True, ) -> pd.DataFrame | dict: """ Gets the collection of definition files of a report. @@ -187,8 +191,8 @@ def get_report_definition( Parameters ---------- - report : str - Name of the report. + report : str | uuid.UUID + Name or ID of the report. workspace : str | uuid.UUID, default=None The Fabric workspace name or ID in which the report resides. Defaults to None which resolves to the workspace of the attached lakehouse @@ -198,25 +202,17 @@ def get_report_definition( Returns ------- - pandas.DataFrame | dict + pandas.DataFrame The collection of report definition files within a pandas dataframe. """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - report_id = resolve_item_id(item=report, type="Report", workspace=workspace) - - result = _base_api( - request=f"/v1/workspaces/{workspace_id}/reports/{report_id}/getDefinition", - method="post", - lro_return_json=True, - status_codes=None, + return get_item_definition( + item=report, + type="Report", + workspace=workspace, + return_dataframe=return_dataframe, ) - if return_dataframe: - return pd.json_normalize(result["definition"]["parts"]) - else: - return result - @log def create_model_bpa_report( @@ -327,9 +323,9 @@ def _create_report( from sempy_labs.report import report_rebind - report_workspace = fabric.resolve_workspace_name(report_workspace) - report_workspace_id = fabric.resolve_workspace_id(report_workspace) - dataset_workspace = fabric.resolve_workspace_name(dataset_workspace) + (report_workspace_name, report_workspace_id) = resolve_workspace_name_and_id( + workspace=report_workspace + ) dfR = fabric.list_reports(workspace=report_workspace) dfR_filt = dfR[dfR["Name"] == report] @@ -346,7 +342,7 @@ def _create_report( ) print( - f"{icons.green_dot} The '{report}' report has been created within the '{report_workspace}'" + f"{icons.green_dot} The '{report}' report has been created within the '{report_workspace_name}'" ) updated_report = True # Update the report if it exists @@ -360,12 +356,12 @@ def _create_report( status_codes=None, ) print( - f"{icons.green_dot} The '{report}' report has been updated within the '{report_workspace}'" + f"{icons.green_dot} The '{report}' report has been updated within the '{report_workspace_name}'" ) updated_report = True else: raise ValueError( - f"{icons.red_dot} The '{report}' report within the '{report_workspace}' workspace already exists and the 'overwrite' parameter was set to False." + f"{icons.red_dot} The '{report}' report within the '{report_workspace_name}' workspace already exists and the 'overwrite' parameter was set to False." ) # Rebind the report to the semantic model to make sure it is pointed at the correct semantic model diff --git a/src/sempy_labs/report/_report_bpa.py b/src/sempy_labs/report/_report_bpa.py index 6219dd7e..5ab25e58 100644 --- a/src/sempy_labs/report/_report_bpa.py +++ b/src/sempy_labs/report/_report_bpa.py @@ -1,4 +1,3 @@ -import sempy.fabric as fabric from typing import Optional import pandas as pd import datetime @@ -7,8 +6,7 @@ from sempy_labs._helper_functions import ( format_dax_object_name, save_as_delta_table, - resolve_report_id, - resolve_lakehouse_name, + resolve_item_name_and_id, resolve_workspace_capacity, _get_column_aggregate, resolve_workspace_name_and_id, @@ -54,9 +52,7 @@ def run_report_bpa( A pandas dataframe in HTML format showing report objects which violated the best practice analyzer rules. """ - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - - rpt = ReportWrapper(report=report, workspace=workspace_id) + rpt = ReportWrapper(report=report, workspace=workspace) dfCV = rpt.list_custom_visuals() dfP = rpt.list_pages() @@ -149,7 +145,7 @@ def execute_rule(row): df_output["Description"] = row["Description"] df_output["URL"] = row["URL"] df_output["Report URL"] = helper.get_web_url( - report=report, workspace=workspace_id + report=report, workspace=workspace ) page_mapping_dict = dfP.set_index("Page Display Name")["Page URL"].to_dict() @@ -205,31 +201,28 @@ def execute_rule(row): now = datetime.datetime.now() delta_table_name = "reportbparesults" - lakehouse_id = fabric.get_lakehouse_id() - lake_workspace = fabric.resolve_workspace_name() - lakehouse = resolve_lakehouse_name( - lakehouse_id=lakehouse_id, workspace=lake_workspace - ) - - lakeT = get_lakehouse_tables(lakehouse=lakehouse, workspace=lake_workspace) + lakeT = get_lakehouse_tables() lakeT_filt = lakeT[lakeT["Table Name"] == delta_table_name] if len(lakeT_filt) == 0: runId = 1 else: - max_run_id = _get_column_aggregate( - lakehouse=lakehouse, table_name=delta_table_name - ) + max_run_id = _get_column_aggregate(table_name=delta_table_name) runId = max_run_id + 1 + (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) + (report_name, report_id) = resolve_item_name_and_id( + item=report, type="Report", workspace=workspace_id + ) + export_df = finalDF.copy() capacity_id, capacity_name = resolve_workspace_capacity(workspace=workspace_id) export_df["Capacity Name"] = capacity_name export_df["Capacity Id"] = capacity_id export_df["Workspace Name"] = workspace_name export_df["Workspace Id"] = workspace_id - export_df["Report Name"] = report - export_df["Report Id"] = resolve_report_id(report, workspace_id) + export_df["Report Name"] = report_name + export_df["Report Id"] = report_id export_df["RunId"] = runId export_df["Timestamp"] = now export_df["RunId"] = export_df["RunId"].astype(int) diff --git a/src/sempy_labs/report/_report_functions.py b/src/sempy_labs/report/_report_functions.py index 5f10b9ab..578ed991 100644 --- a/src/sempy_labs/report/_report_functions.py +++ b/src/sempy_labs/report/_report_functions.py @@ -10,7 +10,6 @@ from sempy_labs.lakehouse._lakehouse import lakehouse_attached from sempy_labs._helper_functions import ( resolve_report_id, - resolve_lakehouse_name, language_validate, resolve_workspace_name_and_id, _decode_b64, @@ -18,6 +17,8 @@ _update_dataframe_datatypes, _base_api, _create_spark_session, + _mount, + resolve_workspace_id, ) from typing import List, Optional, Union from sempy._utils._log import log @@ -74,18 +75,16 @@ def get_report_json( f"{icons.red_dot} In order to save the report.json file, a lakehouse must be attached to the notebook. Please attach a lakehouse to this notebook." ) - lakehouse_id = fabric.get_lakehouse_id() - lake_workspace = fabric.resolve_workspace_name() - lakehouse = resolve_lakehouse_name(lakehouse_id, lake_workspace) - folderPath = "/lakehouse/default/Files" - fileExt = ".json" - if not save_to_file_name.endswith(fileExt): - save_to_file_name = f"{save_to_file_name}{fileExt}" - filePath = os.path.join(folderPath, save_to_file_name) - with open(filePath, "w") as json_file: + local_path = _mount() + save_folder = f"{local_path}/Files" + file_ext = ".json" + if not save_to_file_name.endswith(file_ext): + save_to_file_name = f"{save_to_file_name}{file_ext}" + file_path = os.path.join(save_folder, save_to_file_name) + with open(file_path, "w") as json_file: json.dump(report_json, json_file, indent=4) print( - f"{icons.green_dot} The report.json file for the '{report}' report has been saved to the '{lakehouse}' in this location: '{filePath}'.\n\n" + f"{icons.green_dot} The report.json file for the '{report}' report has been saved to the lakehouse attached to this notebook in this location: Files/'{save_to_file_name}'.\n\n" ) return report_json @@ -117,9 +116,9 @@ def report_dependency_tree(workspace: Optional[str | UUID] = None): dfR.rename(columns={"Name": "Report Name"}, inplace=True) dfR = dfR[["Report Name", "Dataset Name"]] - report_icon = "\U0001F4F6" - dataset_icon = "\U0001F9CA" - workspace_icon = "\U0001F465" + report_icon = "\U0001f4f6" + dataset_icon = "\U0001f9ca" + workspace_icon = "\U0001f465" node_dict = {} rootNode = Node(workspace_name) @@ -194,7 +193,7 @@ def clone_report( target_workspace = workspace_name target_workspace_id = workspace_id else: - target_workspace_id = fabric.resolve_workspace_id(target_workspace) + target_workspace_id = resolve_workspace_id(workspace=target_workspace) if target_dataset is not None: if target_dataset_workspace is None: diff --git a/src/sempy_labs/report/_report_helper.py b/src/sempy_labs/report/_report_helper.py index a3970bdb..58be85b8 100644 --- a/src/sempy_labs/report/_report_helper.py +++ b/src/sempy_labs/report/_report_helper.py @@ -1,14 +1,5 @@ -import sempy.fabric as fabric -from typing import Tuple, Optional -import sempy_labs._icons as icons -import re -import base64 -import json import requests -from uuid import UUID -from sempy_labs._helper_functions import ( - resolve_workspace_name_and_id, -) +import sempy_labs._icons as icons vis_type_mapping = { @@ -47,10 +38,9 @@ "decompositionTreeVisual": "Decomposition tree", "qnaVisual": "Q&A", "aiNarratives": "Narrative", - "scorecard": "Metrics (Preview)", + "scorecard": "Goals (Preview)", "rdlVisual": "Paginated report", "cardVisual": "Card (new)", - "advancedSlicerVisual": "Slicer (new)", "actionButton": "Button", "bookmarkNavigator": "Bookmark navigator", "image": "Image", @@ -58,8 +48,33 @@ "pageNavigator": "Page navigator", "shape": "Shape", "Group": "Group", + "listSlicer": "List Slicer", + "advancedSlicerVisual": "Button Slicer", + "FlowVisual_C29F1DCC_81F5_4973_94AD_0517D44CC06A": "Power Automate for Power BI", } + +def generate_visual_file_path(page_file_path: str, visual_id: str) -> str: + + return page_file_path.split("/page.json")[0] + f"/visuals/{visual_id}.json" + + +def resolve_visual_type(visual_type: str) -> str: + vt_lower = visual_type.lower() + + vis_map_lower = {k.lower(): v for k, v in vis_type_mapping.items()} + flipped_lower = {v.lower(): k for k, v in vis_type_mapping.items()} + + if vt_lower in vis_map_lower: + resolved = vis_map_lower.get(vt_lower) + elif vt_lower in flipped_lower: + resolved = flipped_lower.get(vt_lower) + else: + raise ValueError(f"{icons.red_dot} Unknown visual type: {visual_type}") + + return resolved + + page_type_mapping = { (320, 240): "Tooltip", (816, 1056): "Letter", @@ -70,22 +85,6 @@ page_types = ["Tooltip", "Letter", "4:3", "16:9"] -def get_web_url(report: str, workspace: Optional[str | UUID] = None): - - (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace) - - dfR = fabric.list_reports(workspace=workspace_id) - dfR_filt = dfR[dfR["Name"] == report] - - if len(dfR_filt) == 0: - raise ValueError( - f"{icons.red_dot} The '{report}' report does not exist within the '{workspace_name}' workspace." - ) - web_url = dfR_filt["Web Url"].iloc[0] - - return web_url - - def populate_custom_visual_display_names(): url = "https://catalogapi.azure.com/offers?api-version=2018-08-01-beta&storefront=appsource&$filter=offerType+eq+%27PowerBIVisuals%27" @@ -128,106 +127,6 @@ def fetch_all_pages(start_url): vis_type_mapping[vizId] = displayName -def resolve_page_name(self, page_name: str) -> Tuple[str, str, str]: - - dfP = self.list_pages() - if any(r["Page Name"] == page_name for _, r in dfP.iterrows()): - valid_page_name = page_name - dfP_filt = dfP[dfP["Page Name"] == page_name] - valid_display_name = dfP_filt["Page Display Name"].iloc[0] - file_path = dfP_filt["File Path"].iloc[0] - elif any(r["Page Display Name"] == page_name for _, r in dfP.iterrows()): - valid_display_name = page_name - dfP_filt = dfP[dfP["Page Display Name"] == page_name] - valid_page_name = dfP_filt["Page Name"].iloc[0] - file_path = dfP_filt["File Path"].iloc[0] - else: - raise ValueError( - f"{icons.red_dot} Invalid page name. The '{page_name}' page does not exist in the '{self._report}' report within the '{self._workspace}' workspace." - ) - - return valid_page_name, valid_display_name, file_path - - -def visual_page_mapping(self) -> Tuple[dict, dict]: - - page_mapping = {} - visual_mapping = {} - rd = self.rdef - for _, r in rd.iterrows(): - file_path = r["path"] - payload = r["payload"] - if file_path.endswith("/page.json"): - pattern_page = r"/pages/(.*?)/page.json" - page_name = re.search(pattern_page, file_path).group(1) - obj_file = base64.b64decode(payload).decode("utf-8") - obj_json = json.loads(obj_file) - page_id = obj_json.get("name") - page_display = obj_json.get("displayName") - page_mapping[page_name] = (page_id, page_display) - for _, r in rd.iterrows(): - file_path = r["path"] - payload = r["payload"] - if file_path.endswith("/visual.json"): - pattern_page = r"/pages/(.*?)/visuals/" - page_name = re.search(pattern_page, file_path).group(1) - visual_mapping[file_path] = ( - page_mapping.get(page_name)[0], - page_mapping.get(page_name)[1], - ) - - return page_mapping, visual_mapping - - -def resolve_visual_name( - self, page_name: str, visual_name: str -) -> Tuple[str, str, str, str]: - """ - Obtains the page name, page display name, and the file path for a given page in a report. - - Parameters - ---------- - page_name : str - The name of the page of the report - either the page name (GUID) or the page display name. - visual_name : str - The name of the visual of the report. - - Returns - ------- - Tuple[str, str, str, str] Page name, page display name, visual name, file path from the report definition. - - """ - - dfV = self.list_visuals() - if any( - (r["Page Name"] == page_name) & (r["Visual Name"] == visual_name) - for _, r in dfV.iterrows() - ): - valid_page_name = page_name - dfV_filt = dfV[ - (dfV["Page Name"] == page_name) & (dfV["Visual Name"] == visual_name) - ] - file_path = dfV_filt["File Path"].iloc[0] - valid_display_name = dfV_filt["Page Display Name"].iloc[0] - elif any( - (r["Page Display Name"] == page_name) & (r["Visual Name"] == visual_name) - for _, r in dfV.iterrows() - ): - valid_display_name = page_name - dfV_filt = dfV[ - (dfV["Page Display Name"] == page_name) - & (dfV["Visual Name"] == visual_name) - ] - file_path = dfV_filt["File Path"].iloc[0] - valid_page_name = dfV_filt["Page Name"].iloc[0] - else: - raise ValueError( - f"{icons.red_dot} Invalid page/visual name. The '{visual_name}' visual on the '{page_name}' page does not exist in the '{self._report}' report within the '{self._workspace}' workspace." - ) - - return valid_page_name, valid_display_name, visual_name, file_path - - def find_entity_property_pairs(data, result=None, keys_path=None): if result is None: @@ -236,15 +135,27 @@ def find_entity_property_pairs(data, result=None, keys_path=None): keys_path = [] if isinstance(data, dict): + expression = data.get("Expression", {}) + source_ref = ( + expression.get("SourceRef", {}) if isinstance(expression, dict) else {} + ) + if ( - "Entity" in data.get("Expression", {}).get("SourceRef", {}) + isinstance(source_ref, dict) + and "Entity" in source_ref and "Property" in data ): - entity = data.get("Expression", {}).get("SourceRef", {}).get("Entity", {}) - property_value = data.get("Property") - object_type = keys_path[-1].replace("HierarchyLevel", "Hierarchy") + entity = source_ref.get("Entity", "") + property_value = data.get("Property", "") + + object_type = ( + keys_path[-1].replace("HierarchyLevel", "Hierarchy") + if keys_path + else "Unknown" + ) result[property_value] = (entity, object_type) - keys_path.pop() + if keys_path: + keys_path.pop() # Recursively search the rest of the dictionary for key, value in data.items(): diff --git a/src/sempy_labs/report/_report_rebind.py b/src/sempy_labs/report/_report_rebind.py index 0bae8866..82d33ddc 100644 --- a/src/sempy_labs/report/_report_rebind.py +++ b/src/sempy_labs/report/_report_rebind.py @@ -1,9 +1,9 @@ -import sempy.fabric as fabric from sempy_labs._helper_functions import ( resolve_dataset_id, resolve_workspace_name_and_id, resolve_report_id, _base_api, + resolve_dataset_name_and_id, ) from typing import Optional, List from sempy._utils._log import log @@ -104,10 +104,12 @@ def report_rebind_all( f"{icons.red_dot} The 'dataset' and 'new_dataset' parameters are both set to '{dataset}'. These parameters must be set to different values." ) - dataset_workspace = fabric.resolve_workspace_name(dataset_workspace) - - if new_dataset_workpace is None: - new_dataset_workpace = dataset_workspace + (dataset_name, dataset_id) = resolve_dataset_name_and_id( + dataset=dataset, workspace=dataset_workspace + ) + (dataset_workspace_name, dataset_workspace_id) = resolve_workspace_name_and_id( + workspace=dataset_workspace + ) if isinstance(report_workspace, str): report_workspace = [report_workspace] @@ -118,7 +120,7 @@ def report_rebind_all( if len(dfR) == 0: print( - f"{icons.info} The '{dataset}' semantic model within the '{dataset_workspace}' workspace has no dependent reports." + f"{icons.info} The '{dataset_name}' semantic model within the '{dataset_workspace_name}' workspace has no dependent reports." ) return diff --git a/src/sempy_labs/report/_reportwrapper.py b/src/sempy_labs/report/_reportwrapper.py index 8888564f..67af8ea3 100644 --- a/src/sempy_labs/report/_reportwrapper.py +++ b/src/sempy_labs/report/_reportwrapper.py @@ -1,28 +1,40 @@ +from typing import Optional, Tuple, List, Literal +from contextlib import contextmanager +from sempy._utils._log import log +from uuid import UUID from sempy_labs._helper_functions import ( - resolve_report_id, - format_dax_object_name, - resolve_dataset_from_report, - _conv_b64, - _extract_json, - _add_part, - _decode_b64, resolve_workspace_name_and_id, - _update_dataframe_datatypes, + resolve_item_name_and_id, _base_api, _create_dataframe, + _update_dataframe_datatypes, + format_dax_object_name, + resolve_dataset_from_report, + generate_number_guid, + decode_payload, + is_base64, + generate_hex, + get_jsonpath_value, + set_json_value, + remove_json_value, +) +from sempy_labs._dictionary_diffs import ( + diff_parts, ) -from typing import Optional, List -import pandas as pd import json -import base64 -from uuid import UUID -from sempy._utils._log import log import sempy_labs._icons as icons +import copy +import pandas as pd +from jsonpath_ng.ext import parse import sempy_labs.report._report_helper as helper from sempy_labs._model_dependencies import get_measure_dependencies -from jsonpath_ng.ext import parse -import warnings import requests +import re +import base64 +from pathlib import Path +from urllib.parse import urlparse +import os +import fnmatch class ReportWrapper: @@ -33,128 +45,459 @@ class ReportWrapper: Parameters ---------- - report : str - The name of the report. + report : str | uuid.UUID + The name or ID of the report. workspace : str | uuid.UUID The name or ID of the workspace in which the report resides. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. + readonly: bool, default=True + Whether the connection is read-only or read/write. Setting this to False enables read/write which saves the changes made back to the server. + show_diffs: bool, default=True + Whether to show the differences between the current report definition in the service and the new report definition. Returns ------- - pandas.DataFrame - A pandas dataframe containing the report metadata definition files. + None + A connection to the report is established and the report definition is retrieved. """ - _report: str - _workspace: str + _report_name: str + _report_id: str + _workspace_name: str + _workspace_id: str + _readonly: bool + _report_file_path = "definition/report.json" + _pages_file_path = "definition/pages/pages.json" + _report_extensions_path = "definition/reportExtensions.json" + + # Visuals + _title_path = ( + "$.visual.visualContainerObjects.title[*].properties.text.expr.Literal.Value" + ) + _subtitle_path = ( + "$.visual.visualContainerObjects.subTitle[*].properties.text.expr.Literal.Value" + ) + _visual_x_path = "$.position.x" + _visual_y_path = "$.position.y" @log def __init__( self, - report: str, + report: str | UUID, workspace: Optional[str | UUID] = None, + readonly: bool = True, + show_diffs: bool = True, ): - """ - Connects to a Power BI report and retrieves its definition. + (self._workspace_name, self._workspace_id) = resolve_workspace_name_and_id( + workspace + ) + (self._report_name, self._report_id) = resolve_item_name_and_id( + item=report, type="Report", workspace=self._workspace_id + ) + self._readonly = readonly + self._show_diffs = show_diffs + + result = _base_api( + request=f"/v1/workspaces/{self._workspace_id}/items/{self._report_id}/getDefinition", + method="post", + status_codes=None, + lro_return_json=True, + ) + + # def is_zip_file(data: bytes) -> bool: + # return data.startswith(b"PK\x03\x04") + + # Check that the report is in the PBIR format + parts = result.get("definition", {}).get("parts", []) + if self._report_file_path not in [p.get("path") for p in parts]: + self.format = "PBIR-Legacy" + else: + self.format = "PBIR" + self._report_definition = {"parts": []} + for part in parts: + path = part.get("path") + payload = part.get("payload") + + # decoded_bytes = base64.b64decode(payload) + # decoded_payload = json.loads(_decode_b64(payload)) + # try: + # decoded_payload = json.loads(base64.b64decode(payload).decode("utf-8")) + # except Exception: + # decoded_payload = base64.b64decode(payload) + decoded_payload = decode_payload(payload) + + # if is_zip_file(decoded_bytes): + # merged_payload = {} + # with zipfile.ZipFile(BytesIO(decoded_bytes)) as zip_file: + # for filename in zip_file.namelist(): + # if filename.endswith(".json"): + # with zip_file.open(filename) as f: + # content = f.read() + # part_data = json.loads(content.decode("utf-8")) + + # if isinstance(part_data, dict): + # merged_payload.update(part_data) + # else: + # # For non-dict top-level json (rare), store under filename + # merged_payload[filename] = part_data + + # self._report_definition["parts"].append( + # {"path": path, "payload": merged_payload} + # ) + # else: + # decoded_payload = json.loads(decoded_bytes.decode("utf-8")) + self._report_definition["parts"].append( + {"path": path, "payload": decoded_payload} + ) + + self._current_report_definition = copy.deepcopy(self._report_definition) + + # self.report = self.Report(self) - The ReportWrapper and all functions which depend on it require the report to be in the `PBIR `_ format. + helper.populate_custom_visual_display_names() + + def _ensure_pbir(self): + + if self.format != "PBIR": + raise NotImplementedError( + f"{icons.red_dot} This ReportWrapper function requires the report to be in the PBIR format." + "See here for details: https://powerbi.microsoft.com/blog/power-bi-enhanced-report-format-pbir-in-power-bi-desktop-developer-mode-preview/" + ) + + # Basic functions + def get( + self, + file_path: str, + json_path: Optional[str] = None, + ) -> dict | List[Tuple[str, dict]]: + """ + Get the json content of the specified report definition file. Parameters ---------- - report : str - The name of the report. - workspace : str | UUID - The name or ID of the workspace in which the report resides. - Defaults to None which resolves to the workspace of the attached lakehouse - or if no lakehouse attached, resolves to the workspace of the notebook. + file_path : str + The path of the report definition file. For example: "definition/pages/pages.json". You may also use wildcards. For example: "definition/pages/*/page.json". + json_path : str, default=None + The json path to the specific part of the file to be retrieved. If None, the entire file content is returned. Returns ------- - pandas.DataFrame - A pandas dataframe containing the report metadata definition files. + dict | List[Tuple[str, dict]] + The json content of the specified report definition file. """ - from sempy_labs.report import get_report_definition + parts = self._report_definition.get("parts") - warnings.simplefilter(action="ignore", category=FutureWarning) + # Find matching parts + if "*" in file_path: + matching_parts = [ + (part.get("path"), part.get("payload")) + for part in parts + if fnmatch.fnmatch(part.get("path"), file_path) + ] - self._report = report - (self._workspace_name, self._workspace_id) = resolve_workspace_name_and_id( - workspace - ) - self._report_id = resolve_report_id(report, self._workspace_id) - self.rdef = get_report_definition( - report=self._report, workspace=self._workspace_id + if not matching_parts: + raise ValueError( + f"{icons.red_dot} No files match the wildcard path '{file_path}'." + ) + + results = [] + for path, payload in matching_parts: + if not json_path: + results.append((path, payload)) + elif not isinstance(payload, dict): + raise ValueError( + f"{icons.red_dot} The payload of the file '{path}' is not a dictionary." + ) + else: + jsonpath_expr = parse(json_path) + matches = jsonpath_expr.find(payload) + if matches: + results.append((path, matches[0].value)) + # else: + # raise ValueError( + # f"{icons.red_dot} No match found for '{json_path}' in '{path}'." + # ) + if not results: + raise ValueError( + f"{icons.red_dot} No match found for '{json_path}' in any of the files matching the wildcard path '{file_path}'." + ) + return results + + # Exact path match + for part in parts: + if part.get("path") == file_path: + payload = part.get("payload") + if not json_path: + return payload + elif not isinstance(payload, dict): + raise ValueError( + f"{icons.red_dot} The payload of the file '{file_path}' is not a dictionary." + ) + else: + jsonpath_expr = parse(json_path) + matches = jsonpath_expr.find(payload) + if matches: + return matches[0].value + else: + raise ValueError( + f"{icons.red_dot} No match found for '{json_path}'." + ) + + raise ValueError( + f"{icons.red_dot} File '{file_path}' not found in report definition." ) - if len(self.rdef[self.rdef["path"] == "definition/report.json"]) == 0: + def add(self, file_path: str, payload: dict | bytes): + """ + Add a new file to the report definition. + + Parameters + ---------- + file_path : str + The path of the file to be added. For example: "definition/pages/pages.json". + payload : dict | bytes + The json content of the file to be added. This can be a dictionary or a base64 encoded string. + """ + + decoded_payload = decode_payload(payload) + + if file_path in self.list_paths().get("Path").values: raise ValueError( - f"{icons.red_dot} The ReportWrapper function requires the report to be in the PBIR format." - "See here for details: https://powerbi.microsoft.com/blog/power-bi-enhanced-report-format-pbir-in-power-bi-desktop-developer-mode-preview/" + f"{icons.red_dot} Cannot add the '{file_path}' file as this file path already exists in the report definition." ) - # Helper functions - def _add_extended(self, dataframe): + self._report_definition["parts"].append( + {"path": file_path, "payload": decoded_payload} + ) - from sempy_labs.tom import connect_semantic_model + def remove(self, file_path: str, json_path: Optional[str] = None, verbose=True): + """ + Removes a file from the report definition. - dataset_id, dataset_name, dataset_workspace_id, dataset_workspace_name = ( - resolve_dataset_from_report( - report=self._report, workspace=self._workspace_id + Parameters + ---------- + file_path : str + The path of the file to be removed. For example: "definition/pages/fjdis323484/page.json". + json_path : str, default=None + The json path to the specific part of the file to be removed. If None, the entire file is removed. Wildcards are supported (i.e. "definition/pages/*/page.json"). + verbose : bool, default=True + If True, prints messages about the removal process. If False, suppresses these messages. + """ + + parts = self._report_definition.get("parts") + matching_parts = [] + + if "*" in file_path: + matching_parts = [ + part for part in parts if fnmatch.fnmatch(part.get("path"), file_path) + ] + else: + matching_parts = [part for part in parts if part.get("path") == file_path] + + if not matching_parts: + raise ValueError( + f"{icons.red_dot} No file(s) found for path '{file_path}'." ) + + for part in matching_parts: + path = part.get("path") + payload = part.get("payload") + + if not json_path: + self._report_definition["parts"].remove(part) + print( + f"{icons.green_dot} The file '{path}' has been removed from the report definition." + ) + else: + remove_json_value( + path=path, payload=payload, json_path=json_path, verbose=verbose + ) + + def update(self, file_path: str, payload: dict | bytes): + """ + Updates the payload of a file in the report definition. + + Parameters + ---------- + file_path : str + The path of the file to be updated. For example: "definition/pages/pages.json". + payload : dict | bytes + The new json content of the file to be updated. This can be a dictionary or a base64 encoded string. + """ + + decoded_payload = decode_payload(payload) + + for part in self._report_definition.get("parts"): + if part.get("path") == file_path: + part["payload"] = decoded_payload + # if not self._readonly: + # print( + # f"The file '{file_path}' has been updated in the report definition." + # ) + return + + raise ValueError( + f"The '{file_path}' file was not found in the report definition." ) - with connect_semantic_model( - dataset=dataset_id, readonly=True, workspace=dataset_workspace_id - ) as tom: - for index, row in dataframe.iterrows(): - obj_type = row["Object Type"] - if obj_type == "Measure": - dataframe.at[index, "Valid Semantic Model Object"] = any( - o.Name == row["Object Name"] for o in tom.all_measures() - ) - elif obj_type == "Column": - dataframe.at[index, "Valid Semantic Model Object"] = any( - format_dax_object_name(c.Parent.Name, c.Name) - == format_dax_object_name(row["Table Name"], row["Object Name"]) - for c in tom.all_columns() - ) - elif obj_type == "Hierarchy": - dataframe.at[index, "Valid Semantic Model Object"] = any( - format_dax_object_name(h.Parent.Name, h.Name) - == format_dax_object_name(row["Table Name"], row["Object Name"]) - for h in tom.all_hierarchies() - ) - return dataframe + def set_json(self, file_path: str, json_path: str, json_value: str | dict | List): + """ + Sets the JSON value of a file in the report definition. If the json_path does not exist, it will be created. + + Parameters + ---------- + file_path : str + The file path of the JSON file to be updated. For example: "definition/pages/ReportSection1/visuals/a1d8f99b81dcc2d59035/visual.json". Also supports wildcards. + json_path : str + The JSON path to the value to be updated or created. This must be a valid JSONPath expression. + Examples: + "$.objects.outspace" + "$.hi.def[*].vv" + json_value : str | dict | List + The new value to be set at the specified JSON path. This can be a string, dictionary, or list. + """ + + files = self.get(file_path=file_path) + + if isinstance(files, dict): + files = [(file_path, files)] + + for file in files: + path = file[0] + payload = file[1] + new_payload = set_json_value( + payload=payload, json_path=json_path, json_value=json_value + ) - def _update_single_file(self, file_name: str, new_payload): + self.update(file_path=path, payload=new_payload) + + def list_paths(self) -> pd.DataFrame: """ - Updates a single file within the PBIR structure + List all file paths in the report definition. + + Returns + ------- + pandas.DataFrame + A pandas dataframe containing a list of all paths in the report definition. """ - request_body = {"definition": {"parts": []}} - for _, r in self.rdef.iterrows(): - path = r["path"] - payload = r["payload"] - if path == file_name: - _add_part(request_body, path=path, payload=new_payload) - else: - _add_part(request_body, path=path, payload=payload) + existing_paths = [ + part.get("path") for part in self._report_definition.get("parts") + ] + return pd.DataFrame(existing_paths, columns=["Path"]) - self.update_report(request_body) + def __all_pages(self): - def update_report(self, request_body: dict): + self._ensure_pbir() - _base_api( - request=f"/v1/workspaces/{self._workspace_id}/reports/{self._report_id}/updateDefinition", - method="post", - payload=request_body, - lro_return_status_code=True, - status_codes=None, + return [ + o + for o in self._report_definition.get("parts") + if o.get("path").endswith("/page.json") + ] + + def __all_visuals(self): + + self._ensure_pbir() + + return [ + o + for o in self._report_definition.get("parts") + if o.get("path").endswith("/visual.json") + ] + + # Helper functions + def __resolve_page_list(self, page: Optional[str | List[str]] = None) -> List[str]: + + if isinstance(page, str): + page = [page] + + # Resolve page list + return ( + [self.resolve_page_name(p) for p in page] + if page + else [ + p["payload"]["name"] + for p in self.__all_pages() + if "payload" in p and "name" in p["payload"] + ] + ) + + def _get_url(self, page_name: Optional[str] = None) -> str: + """ + Gets the URL of the report. If specified, gets the URL of the specified page. + + Parameters + ---------- + page_name : str, default=None + The name of the page. If None, gets the URL of the report. + If specified, gets the URL of the specified page. + + Returns + ------- + str + The URL of the report or the specified page. + """ + + url = f"https://app.powerbi.com/groups/{self._workspace_id}/reports/{self._report_id}" + + if page_name: + url += f"/{page_name}" + + return url + + def __resolve_page_name_and_display_name_file_path( + self, page: str + ) -> Tuple[str, str, str]: + + self._ensure_pbir() + page_map = { + p["path"]: [p["payload"]["name"], p["payload"]["displayName"]] + for p in self._report_definition.get("parts", []) + if p.get("path", "").endswith("/page.json") and "payload" in p + } + + # Build lookup: page_id → (path, display_name) + id_lookup = {v[0]: (k, v[1]) for k, v in page_map.items()} + + # Build lookup: display_name → (path, page_id) + name_lookup = {v[1]: (k, v[0]) for k, v in page_map.items()} + + if page in id_lookup: + path, display_name = id_lookup[page] + return path, page, display_name + elif page in name_lookup: + path, page_id = name_lookup[page] + return path, page_id, page + else: + raise ValueError( + f"{icons.red_dot} Invalid page display name. The '{page}' page does not exist in the '{self._report_name}' report within the '{self._workspace_name}' workspace." + ) + + def _resolve_page_name_and_display_name(self, page: str) -> Tuple[str, str]: + """ + Obtains the page name, page display name for a given page in a report. + + Parameters + ---------- + page : str + The page name or display name. + + Returns + ------- + Tuple[str, str] + The page name and display name. + """ + + (_, page_id, page_name) = self.__resolve_page_name_and_display_name_file_path( + page ) - def resolve_page_name(self, page_display_name: str) -> UUID: + return (page_id, page_name) + + def resolve_page_name(self, page_display_name: str) -> str: """ Obtains the page name, page display name, and the file path for a given page in a report. @@ -165,21 +508,22 @@ def resolve_page_name(self, page_display_name: str) -> UUID: Returns ------- - UUID + str The page name. """ - x, y, z = helper.resolve_page_name(self, page_display_name) - - return x + (path, page_id, page_name) = ( + self.__resolve_page_name_and_display_name_file_path(page_display_name) + ) + return page_id - def resolve_page_display_name(self, page_name: UUID) -> str: + def resolve_page_display_name(self, page_name: str) -> str: """ Obtains the page dispaly name. Parameters ---------- - page_name : UUID + page_name : str The name of the page of the report. Returns @@ -188,59 +532,120 @@ def resolve_page_display_name(self, page_name: UUID) -> str: The page display name. """ - x, y, z = helper.resolve_page_name(self, page_name=page_name) + (path, page_id, page_name) = ( + self.__resolve_page_name_and_display_name_file_path(page_name) + ) + return page_name - return y + def __add_to_registered_resources(self, name: str, path: str, type: str): - def get_theme(self, theme_type: str = "baseTheme") -> dict: - """ - Obtains the theme file of the report. + type = type.capitalize() - Parameters - ---------- - theme_type : str, default="baseTheme" - The theme type. Options: "baseTheme", "customTheme". + report_file = self.get(file_path=self._report_file_path) + rp_names = [rp.get("name") for rp in report_file.get("resourcePackages")] - Returns - ------- - dict - The theme.json file - """ + new_item = {"name": name, "path": path, "type": type} + if "RegisteredResources" not in rp_names: + res = { + "name": "RegisteredResources", + "type": "RegisteredResources", + "items": [new_item], + } + report_file.get("resourcePackages").append(res) + else: + for rp in report_file.get("resourcePackages"): + if rp.get("name") == "RegisteredResources": + for item in rp.get("items"): + item_name = item.get("name") + item_type = item.get("type") + item_path = item.get("path") + if ( + item_name == name + and item_type == type + and item_path == path + ): + print( + f"{icons.info} The '{item_name}' {type.lower()} already exists in the report definition." + ) + raise ValueError() - theme_types = ["baseTheme", "customTheme"] - theme_type = theme_type.lower() + # Add the new item to the existing RegisteredResources + rp["items"].append(new_item) - if "custom" in theme_type: - theme_type = "customTheme" - elif "base" in theme_type: - theme_type = "baseTheme" - if theme_type not in theme_types: - raise ValueError( - f"{icons.red_dot} Invalid theme type. Valid options: {theme_types}." - ) + self.update(file_path=self._report_file_path, payload=report_file) - rptdef = self.rdef[self.rdef["path"] == "definition/report.json"] - rptJson = _extract_json(rptdef) - theme_collection = rptJson.get("themeCollection", {}) - if theme_type not in theme_collection: - raise ValueError( - f"{icons.red_dot} The {self._report} report within the '{self._workspace_name} workspace has no custom theme." - ) - ct = theme_collection.get(theme_type) - theme_name = ct["name"] - theme_location = ct["type"] - theme_file_path = f"StaticResources/{theme_location}/{theme_name}" - if theme_type == "baseTheme": - theme_file_path = ( - f"StaticResources/{theme_location}/BaseThemes/{theme_name}" + def _add_extended(self, dataframe): + + from sempy_labs.tom import connect_semantic_model + + dataset_id, dataset_name, dataset_workspace_id, dataset_workspace_name = ( + resolve_dataset_from_report( + report=self._report_id, workspace=self._workspace_id ) - if not theme_file_path.endswith(".json"): - theme_file_path = f"{theme_file_path}.json" + ) + + report_level_measures = list( + self.list_report_level_measures()["Measure Name"].values + ) + with connect_semantic_model( + dataset=dataset_id, readonly=True, workspace=dataset_workspace_id + ) as tom: + measure_names = {m.Name for m in tom.all_measures()} + measure_names.update(report_level_measures) + column_names = { + format_dax_object_name(c.Parent.Name, c.Name) for c in tom.all_columns() + } + hierarchy_names = { + format_dax_object_name(h.Parent.Name, h.Name) + for h in tom.all_hierarchies() + } + + # Vectorized checks + def is_valid(row): + obj_type = row["Object Type"] + obj_name = row["Object Name"] + if obj_type == "Measure": + return obj_name in measure_names + elif obj_type == "Column": + return ( + format_dax_object_name(row["Table Name"], obj_name) in column_names + ) + elif obj_type == "Hierarchy": + return ( + format_dax_object_name(row["Table Name"], obj_name) + in hierarchy_names + ) + return False + + dataframe["Valid Semantic Model Object"] = dataframe.apply(is_valid, axis=1) + return dataframe - theme_df = self.rdef[self.rdef["path"] == theme_file_path] - theme_json = _extract_json(theme_df) + def _visual_page_mapping(self) -> dict: + self._ensure_pbir() + + page_mapping = {} + visual_mapping = {} + + for p in self.__all_pages(): + path = p.get("path") + payload = p.get("payload") + pattern_page = r"/pages/(.*?)/page.json" + page_name = re.search(pattern_page, path).group(1) + page_id = payload.get("name") + page_display = payload.get("displayName") + page_mapping[page_name] = (page_id, page_display) + + for v in self.__all_visuals(): + path = v.get("path") + payload = v.get("payload") + pattern_page = r"/pages/(.*?)/visuals/" + page_name = re.search(pattern_page, path).group(1) + visual_mapping[path] = ( + page_mapping.get(page_name)[0], + page_mapping.get(page_name)[1], + ) - return theme_json + return visual_mapping # List functions def list_custom_visuals(self) -> pd.DataFrame: @@ -252,8 +657,7 @@ def list_custom_visuals(self) -> pd.DataFrame: pandas.DataFrame A pandas dataframe containing a list of all the custom visuals used in the report. """ - - helper.populate_custom_visual_display_names() + self._ensure_pbir() columns = { "Custom Visual Name": "str", @@ -262,17 +666,27 @@ def list_custom_visuals(self) -> pd.DataFrame: } df = _create_dataframe(columns=columns) - rd = self.rdef - rd_filt = rd[rd["path"] == "definition/report.json"] - rptJson = _extract_json(rd_filt) - df["Custom Visual Name"] = rptJson.get("publicCustomVisuals") + + report_file = self.get(file_path=self._report_file_path) + + df["Custom Visual Name"] = report_file.get("publicCustomVisuals") df["Custom Visual Display Name"] = df["Custom Visual Name"].apply( lambda x: helper.vis_type_mapping.get(x, x) ) - df["Used in Report"] = df["Custom Visual Name"].isin( - self.list_visuals()["Type"] - ) + visual_types = set() + for v in self.__all_visuals(): + payload = v.get("payload", {}) + visual = payload.get("visual", {}) + visual_type = visual.get("visualType") + if visual_type: + visual_types.add(visual_type) + + for _, r in df.iterrows(): + if r["Custom Visual Name"] in visual_types: + df.at[_, "Used in Report"] = True + else: + df.at[_, "Used in Report"] = False _update_dataframe_datatypes(dataframe=df, column_map=columns) @@ -294,7 +708,9 @@ def list_report_filters(self, extended: bool = False) -> pd.DataFrame: A pandas dataframe containing a list of all the report filters used in the report. """ - rd_filt = self.rdef[self.rdef["path"] == "definition/report.json"] + self._ensure_pbir() + + report_file = self.get(file_path=self._report_file_path) columns = { "Filter Name": "str", @@ -309,35 +725,36 @@ def list_report_filters(self, extended: bool = False) -> pd.DataFrame: } df = _create_dataframe(columns=columns) - if len(rd_filt) == 1: - rpt_json = _extract_json(rd_filt) - if "filterConfig" in rpt_json: - for flt in rpt_json.get("filterConfig", {}).get("filters", {}): - filter_name = flt.get("name") - how_created = flt.get("howCreated") - locked = flt.get("isLockedInViewMode", False) - hidden = flt.get("isHiddenInViewMode", False) - filter_type = flt.get("type", "Basic") - filter_used = True if "Where" in flt.get("filter", {}) else False - - entity_property_pairs = helper.find_entity_property_pairs(flt) + dfs = [] - for object_name, properties in entity_property_pairs.items(): - new_data = { - "Filter Name": filter_name, - "Type": filter_type, - "Table Name": properties[0], - "Object Name": object_name, - "Object Type": properties[1], - "Hidden": hidden, - "Locked": locked, - "How Created": how_created, - "Used": filter_used, - } + if "filterConfig" in report_file: + for flt in report_file.get("filterConfig", {}).get("filters", {}): + filter_name = flt.get("name") + how_created = flt.get("howCreated") + locked = flt.get("isLockedInViewMode", False) + hidden = flt.get("isHiddenInViewMode", False) + filter_type = flt.get("type", "Basic") + filter_used = True if "Where" in flt.get("filter", {}) else False - df = pd.concat( - [df, pd.DataFrame(new_data, index=[0])], ignore_index=True - ) + entity_property_pairs = helper.find_entity_property_pairs(flt) + + for object_name, properties in entity_property_pairs.items(): + new_data = { + "Filter Name": filter_name, + "Type": filter_type, + "Table Name": properties[0], + "Object Name": object_name, + "Object Type": properties[1], + "Hidden": hidden, + "Locked": locked, + "How Created": how_created, + "Used": filter_used, + } + + dfs.append(pd.DataFrame(new_data, index=[0])) + + if dfs: + df = pd.concat(dfs, ignore_index=True) _update_dataframe_datatypes(dataframe=df, column_map=columns) @@ -361,6 +778,7 @@ def list_page_filters(self, extended: bool = False) -> pd.DataFrame: pandas.DataFrame A pandas dataframe containing a list of all the page filters used in the report. """ + self._ensure_pbir() columns = { "Page Name": "str", @@ -377,51 +795,43 @@ def list_page_filters(self, extended: bool = False) -> pd.DataFrame: } df = _create_dataframe(columns=columns) - for _, r in self.rdef.iterrows(): - path = r["path"] - payload = r["payload"] - if path.endswith("/page.json"): - obj_file = base64.b64decode(payload).decode("utf-8") - obj_json = json.loads(obj_file) - page_id = obj_json.get("name") - page_display = obj_json.get("displayName") - - if "filterConfig" in obj_json: - for flt in obj_json.get("filterConfig", {}).get("filters", {}): - filter_name = flt.get("name") - how_created = flt.get("howCreated") - locked = flt.get("isLockedInViewMode", False) - hidden = flt.get("isHiddenInViewMode", False) - filter_type = flt.get("type", "Basic") - filter_used = ( - True if "Where" in flt.get("filter", {}) else False - ) + dfs = [] + for p in self.__all_pages(): + payload = p.get("payload") + page_id = payload.get("name") + page_display = payload.get("displayName") - entity_property_pairs = helper.find_entity_property_pairs(flt) - - for object_name, properties in entity_property_pairs.items(): - new_data = { - "Page Name": page_id, - "Page Display Name": page_display, - "Filter Name": filter_name, - "Type": filter_type, - "Table Name": properties[0], - "Object Name": object_name, - "Object Type": properties[1], - "Hidden": hidden, - "Locked": locked, - "How Created": how_created, - "Used": filter_used, - } - - df = pd.concat( - [df, pd.DataFrame(new_data, index=[0])], - ignore_index=True, - ) + if "filterConfig" in payload: + for flt in payload.get("filterConfig", {}).get("filters", {}): + filter_name = flt.get("name") + how_created = flt.get("howCreated") + locked = flt.get("isLockedInViewMode", False) + hidden = flt.get("isHiddenInViewMode", False) + filter_type = flt.get("type", "Basic") + filter_used = True if "Where" in flt.get("filter", {}) else False - df["Page URL"] = df["Page Name"].apply( - lambda page_name: f"{helper.get_web_url(report=self._report, workspace=self._workspace_id)}/{page_name}" - ) + entity_property_pairs = helper.find_entity_property_pairs(flt) + + for object_name, properties in entity_property_pairs.items(): + new_data = { + "Page Name": page_id, + "Page Display Name": page_display, + "Filter Name": filter_name, + "Type": filter_type, + "Table Name": properties[0], + "Object Name": object_name, + "Object Type": properties[1], + "Hidden": hidden, + "Locked": locked, + "How Created": how_created, + "Used": filter_used, + "Page URL": self._get_url(page_name=page_id), + } + + dfs.append(pd.DataFrame(new_data, index=[0])) + + if dfs: + df = pd.concat(dfs, ignore_index=True) _update_dataframe_datatypes(dataframe=df, column_map=columns) @@ -445,6 +855,7 @@ def list_visual_filters(self, extended: bool = False) -> pd.DataFrame: pandas.DataFrame A pandas dataframe containing a list of all the visual filters used in the report. """ + self._ensure_pbir() columns = { "Page Name": "str", @@ -462,51 +873,47 @@ def list_visual_filters(self, extended: bool = False) -> pd.DataFrame: } df = _create_dataframe(columns=columns) - page_mapping, visual_mapping = helper.visual_page_mapping(self) - - for _, r in self.rdef.iterrows(): - path = r["path"] - payload = r["payload"] - if path.endswith("/visual.json"): - obj_file = base64.b64decode(payload).decode("utf-8") - obj_json = json.loads(obj_file) - page_id = visual_mapping.get(path)[0] - page_display = visual_mapping.get(path)[1] - visual_name = obj_json.get("name") - - if "filterConfig" in obj_json: - for flt in obj_json.get("filterConfig", {}).get("filters", {}): - filter_name = flt.get("name") - how_created = flt.get("howCreated") - locked = flt.get("isLockedInViewMode", False) - hidden = flt.get("isHiddenInViewMode", False) - filter_type = flt.get("type", "Basic") - filter_used = ( - True if "Where" in flt.get("filter", {}) else False - ) + visual_mapping = self._visual_page_mapping() - entity_property_pairs = helper.find_entity_property_pairs(flt) - - for object_name, properties in entity_property_pairs.items(): - new_data = { - "Page Name": page_id, - "Page Display Name": page_display, - "Visual Name": visual_name, - "Filter Name": filter_name, - "Type": filter_type, - "Table Name": properties[0], - "Object Name": object_name, - "Object Type": properties[1], - "Hidden": hidden, - "Locked": locked, - "How Created": how_created, - "Used": filter_used, - } - - df = pd.concat( - [df, pd.DataFrame(new_data, index=[0])], - ignore_index=True, - ) + dfs = [] + for v in self.__all_visuals(): + path = v.get("path") + payload = v.get("payload") + page_id = visual_mapping.get(path)[0] + page_display = visual_mapping.get(path)[1] + visual_name = payload.get("name") + + if "filterConfig" in payload: + for flt in payload.get("filterConfig", {}).get("filters", {}): + filter_name = flt.get("name") + how_created = flt.get("howCreated") + locked = flt.get("isLockedInViewMode", False) + hidden = flt.get("isHiddenInViewMode", False) + filter_type = flt.get("type", "Basic") + filter_used = True if "Where" in flt.get("filter", {}) else False + + entity_property_pairs = helper.find_entity_property_pairs(flt) + + for object_name, properties in entity_property_pairs.items(): + new_data = { + "Page Name": page_id, + "Page Display Name": page_display, + "Visual Name": visual_name, + "Filter Name": filter_name, + "Type": filter_type, + "Table Name": properties[0], + "Object Name": object_name, + "Object Type": properties[1], + "Hidden": hidden, + "Locked": locked, + "How Created": how_created, + "Used": filter_used, + } + + dfs.append(pd.DataFrame(new_data, index=[0])) + + if dfs: + df = pd.concat(dfs, ignore_index=True) _update_dataframe_datatypes(dataframe=df, column_map=columns) @@ -527,6 +934,7 @@ def list_visual_interactions(self) -> pd.DataFrame: pandas.DataFrame A pandas dataframe containing a list of all modified visual interactions used in the report. """ + self._ensure_pbir() columns = { "Page Name": "str", @@ -537,30 +945,28 @@ def list_visual_interactions(self) -> pd.DataFrame: } df = _create_dataframe(columns=columns) - for _, r in self.rdef.iterrows(): - file_path = r["path"] - payload = r["payload"] - if file_path.endswith("/page.json"): - obj_file = base64.b64decode(payload).decode("utf-8") - obj_json = json.loads(obj_file) - page_name = obj_json.get("name") - page_display = obj_json.get("displayName") + dfs = [] + for p in self.__all_pages(): + payload = p.get("payload") + page_name = payload.get("name") + page_display = payload.get("displayName") - for vizInt in obj_json.get("visualInteractions", []): - sourceVisual = vizInt.get("source") - targetVisual = vizInt.get("target") - vizIntType = vizInt.get("type") + for vizInt in payload.get("visualInteractions", []): + sourceVisual = vizInt.get("source") + targetVisual = vizInt.get("target") + vizIntType = vizInt.get("type") - new_data = { - "Page Name": page_name, - "Page Display Name": page_display, - "Source Visual Name": sourceVisual, - "Target Visual Name": targetVisual, - "Type": vizIntType, - } - df = pd.concat( - [df, pd.DataFrame(new_data, index=[0])], ignore_index=True - ) + new_data = { + "Page Name": page_name, + "Page Display Name": page_display, + "Source Visual Name": sourceVisual, + "Target Visual Name": targetVisual, + "Type": vizIntType, + } + dfs.append(pd.DataFrame(new_data, index=[0])) + + if dfs: + df = pd.concat(dfs, ignore_index=True) return df @@ -573,6 +979,7 @@ def list_pages(self) -> pd.DataFrame: pandas.DataFrame A pandas dataframe containing a list of all pages in the report. """ + self._ensure_pbir() columns = { "File Path": "str", @@ -594,46 +1001,42 @@ def list_pages(self) -> pd.DataFrame: } df = _create_dataframe(columns=columns) - dfV = self.list_visuals() - - page_rows = self.rdef[self.rdef["path"].str.endswith("/page.json")] - pages_row = self.rdef[self.rdef["path"] == "definition/pages/pages.json"] + page = self.get(file_path=self._pages_file_path) + active_page = page.get("activePageName") - for _, r in page_rows.iterrows(): - file_path = r["path"] - payload = r["payload"] + dfV = self.list_visuals() - pageFile = base64.b64decode(payload).decode("utf-8") + dfs = [] + for p in self.__all_pages(): + file_path = p.get("path") page_prefix = file_path[0:-9] - pageJson = json.loads(pageFile) - page_name = pageJson.get("name") - height = pageJson.get("height") - width = pageJson.get("width") + payload = p.get("payload") + page_name = payload.get("name") + height = payload.get("height") + width = payload.get("width") # Alignment - matches = parse( - "$.objects.displayArea[0].properties.verticalAlignment.expr.Literal.Value" - ).find(pageJson) - alignment_value = ( - matches[0].value[1:-1] if matches and matches[0].value else "Top" + alignment_value = get_jsonpath_value( + data=payload, + path="$.objects.displayArea[*].properties.verticalAlignment.expr.Literal.Value", + default="Top", + remove_quotes=True, ) # Drillthrough - matches = parse("$.filterConfig.filters[*].howCreated").find(pageJson) + matches = parse("$.filterConfig.filters[*].howCreated").find(payload) how_created_values = [match.value for match in matches] drill_through = any(value == "Drillthrough" for value in how_created_values) - # matches = parse("$.filterConfig.filters[*]").find(pageJson) - # drill_through = any( - # filt.get("howCreated") == "Drillthrough" - # for filt in (match.value for match in matches) - # ) visual_count = len( - self.rdef[ - self.rdef["path"].str.endswith("/visual.json") - & (self.rdef["path"].str.startswith(page_prefix)) + [ + v + for v in self._report_definition.get("parts") + if v.get("path").endswith("/visual.json") + and v.get("path").startswith(page_prefix) ] ) + data_visual_count = len( dfV[(dfV["Page Name"] == page_name) & (dfV["Data Visual"])] ) @@ -642,24 +1045,25 @@ def list_pages(self) -> pd.DataFrame: ) # Page Filter Count - matches = parse("$.filterConfig.filters").find(pageJson) - page_filter_count = ( - len(matches[0].value) if matches and matches[0].value else 0 + page_filter_count = len( + get_jsonpath_value( + data=payload, path="$.filterConfig.filters", default=[] + ) ) # Hidden - matches = parse("$.visibility").find(pageJson) + matches = parse("$.visibility").find(payload) is_hidden = any(match.value == "HiddenInViewMode" for match in matches) new_data = { "File Path": file_path, "Page Name": page_name, - "Page Display Name": pageJson.get("displayName"), - "Display Option": pageJson.get("displayOption"), + "Page Display Name": payload.get("displayName"), + "Display Option": payload.get("displayOption"), "Height": height, "Width": width, "Hidden": is_hidden, - "Active": False, + "Active": True if page_name == active_page else False, "Type": helper.page_type_mapping.get((width, height), "Custom"), "Alignment": alignment_value, "Drillthrough Target Page": drill_through, @@ -667,24 +1071,16 @@ def list_pages(self) -> pd.DataFrame: "Data Visual Count": data_visual_count, "Visible Visual Count": visible_visual_count, "Page Filter Count": page_filter_count, + "Page URL": self._get_url(page_name=page_name), } - df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True) - - page_payload = pages_row["payload"].iloc[0] - pageFile = base64.b64decode(page_payload).decode("utf-8") - pageJson = json.loads(pageFile) - activePage = pageJson["activePageName"] + dfs.append(pd.DataFrame(new_data, index=[0])) - df.loc[df["Page Name"] == activePage, "Active"] = True - - df["Page URL"] = df["Page Name"].apply( - lambda page_name: f"{helper.get_web_url(report=self._report, workspace=self._workspace_id)}/{page_name}" - ) + if dfs: + df = pd.concat(dfs, ignore_index=True) _update_dataframe_datatypes(dataframe=df, column_map=columns) return df - # return df.style.format({"Page URL": _make_clickable}) def list_visuals(self) -> pd.DataFrame: """ @@ -695,6 +1091,7 @@ def list_visuals(self) -> pd.DataFrame: pandas.DataFrame A pandas dataframe containing a list of all visuals in the report. """ + self._ensure_pbir() columns = { "File Path": "str", @@ -726,12 +1123,9 @@ def list_visuals(self) -> pd.DataFrame: } df = _create_dataframe(columns=columns) - rd_filt = self.rdef[self.rdef["path"] == "definition/report.json"] - payload = rd_filt["payload"].iloc[0] - rptJson = _extract_json(rd_filt) - custom_visuals = rptJson.get("publicCustomVisuals", []) - page_mapping, visual_mapping = helper.visual_page_mapping(self) - helper.populate_custom_visual_display_names() + report_file = self.get(file_path=self._report_file_path) + custom_visuals = report_file.get("publicCustomVisuals", []) + visual_mapping = self._visual_page_mapping() agg_type_map = helper._get_agg_type_mapping() def contains_key(data, keys_to_check): @@ -748,150 +1142,144 @@ def contains_key(data, keys_to_check): return any(key in all_keys for key in keys_to_check) - for _, r in self.rdef.iterrows(): - file_path = r["path"] - payload = r["payload"] - if file_path.endswith("/visual.json"): - visual_file = base64.b64decode(payload).decode("utf-8") - visual_json = json.loads(visual_file) - page_id = visual_mapping.get(file_path)[0] - page_display = visual_mapping.get(file_path)[1] - pos = visual_json.get("position") - - # Visual Type - matches = parse("$.visual.visualType").find(visual_json) - visual_type = matches[0].value if matches else "Group" - - visual_type_display = helper.vis_type_mapping.get( - visual_type, visual_type - ) - cst_value, rst_value, slicer_type = False, False, "N/A" + dfs = [] - # Visual Filter Count - matches = parse("$.filterConfig.filters[*]").find(visual_json) - visual_filter_count = len(matches) + for v in self.__all_visuals(): + path = v.get("path") + payload = v.get("payload") + page_id = visual_mapping.get(path)[0] + page_display = visual_mapping.get(path)[1] + pos = payload.get("position") - # Data Limit - matches = parse( - '$.filterConfig.filters[?(@.type == "VisualTopN")].filter.Where[*].Condition.VisualTopN.ItemCount' - ).find(visual_json) - data_limit = matches[0].value if matches else 0 + # Visual Type + matches = parse("$.visual.visualType").find(payload) + visual_type = matches[0].value if matches else "Group" - # Title - matches = parse( - "$.visual.visualContainerObjects.title[0].properties.text.expr" - ).find(visual_json) - # title = matches[0].value[1:-1] if matches else "" - title = ( - helper._get_expression(matches[0].value, agg_type_map) - if matches - else "" - ) + visual_type_display = helper.vis_type_mapping.get(visual_type, visual_type) + cst_value, rst_value, slicer_type = False, False, "N/A" - # SubTitle - matches = parse( - "$.visual.visualContainerObjects.subTitle[0].properties.text.expr" - ).find(visual_json) - # sub_title = matches[0].value[1:-1] if matches else "" - sub_title = ( - helper._get_expression(matches[0].value, agg_type_map) - if matches - else "" - ) + # Visual Filter Count + matches = parse("$.filterConfig.filters[*]").find(payload) + visual_filter_count = len(matches) - # Alt Text - matches = parse( - "$.visual.visualContainerObjects.general[0].properties.altText.expr" - ).find(visual_json) - # alt_text = matches[0].value[1:-1] if matches else "" - alt_text = ( - helper._get_expression(matches[0].value, agg_type_map) - if matches - else "" - ) + # Data Limit + matches = parse( + '$.filterConfig.filters[?(@.type == "VisualTopN")].filter.Where[*].Condition.VisualTopN.ItemCount' + ).find(payload) + data_limit = matches[0].value if matches else 0 + + # Title + matches = parse( + "$.visual.visualContainerObjects.title[0].properties.text.expr" + ).find(payload) + title = ( + helper._get_expression(matches[0].value, agg_type_map) + if matches + else "" + ) + + # SubTitle + matches = parse( + "$.visual.visualContainerObjects.subTitle[0].properties.text.expr" + ).find(payload) + sub_title = ( + helper._get_expression(matches[0].value, agg_type_map) + if matches + else "" + ) - # Show items with no data - def find_show_all_with_jsonpath(obj): - matches = parse("$..showAll").find(obj) - return any(match.value is True for match in matches) + # Alt Text + matches = parse( + "$.visual.visualContainerObjects.general[0].properties.altText.expr" + ).find(payload) + alt_text = ( + helper._get_expression(matches[0].value, agg_type_map) + if matches + else "" + ) - show_all_data = find_show_all_with_jsonpath(visual_json) + # Show items with no data + def find_show_all_with_jsonpath(obj): + matches = parse("$..showAll").find(obj) + return any(match.value is True for match in matches) - # Divider + show_all_data = find_show_all_with_jsonpath(payload) + + # Divider + matches = parse( + "$.visual.visualContainerObjects.divider[0].properties.show.expr.Literal.Value" + ).find(payload) + divider = matches[0] if matches else "" + + # Row/Column Subtotals + if visual_type == "pivotTable": + cst_matches = parse( + "$.visual.objects.subTotals[0].properties.columnSubtotals.expr.Literal.Value" + ).find(payload) + rst_matches = parse( + "$.visual.objects.subTotals[0].properties.rowSubtotals.expr.Literal.Value" + ).find(payload) + + if cst_matches: + cst_value = False if cst_matches[0].value == "false" else True + + if rst_matches: + rst_value = False if rst_matches[0].value == "false" else True + + # Slicer Type + if visual_type == "slicer": matches = parse( - "$.visual.visualContainerObjects.divider[0].properties.show.expr.Literal.Value" - ).find(visual_json) - divider = matches[0] if matches else "" - - # Row/Column Subtotals - if visual_type == "pivotTable": - cst_matches = parse( - "$.visual.objects.subTotals[0].properties.columnSubtotals.expr.Literal.Value" - ).find(visual_json) - rst_matches = parse( - "$.visual.objects.subTotals[0].properties.rowSubtotals.expr.Literal.Value" - ).find(visual_json) - - if cst_matches: - cst_value = False if cst_matches[0].value == "false" else True - - if rst_matches: - rst_value = False if rst_matches[0].value == "false" else True - - # Slicer Type - if visual_type == "slicer": - matches = parse( - "$.visual.objects.data[0].properties.mode.expr.Literal.Value" - ).find(visual_json) - slicer_type = matches[0].value[1:-1] if matches else "N/A" - - # Data Visual - is_data_visual = contains_key( - visual_json, - [ - "Aggregation", - "Column", - "Measure", - "HierarchyLevel", - "NativeVisualCalculation", - ], - ) + "$.visual.objects.data[0].properties.mode.expr.Literal.Value" + ).find(payload) + slicer_type = matches[0].value[1:-1] if matches else "N/A" + + # Data Visual + is_data_visual = contains_key( + payload, + [ + "Aggregation", + "Column", + "Measure", + "HierarchyLevel", + "NativeVisualCalculation", + ], + ) - # Sparkline - has_sparkline = contains_key(visual_json, ["SparklineData"]) + # Sparkline + has_sparkline = contains_key(payload, ["SparklineData"]) - new_data = { - "File Path": file_path, - "Page Name": page_id, - "Page Display Name": page_display, - "Visual Name": visual_json.get("name"), - "X": pos.get("x"), - "Y": pos.get("y"), - "Z": pos.get("z"), - "Width": pos.get("width"), - "Height": pos.get("height"), - "Tab Order": pos.get("tabOrder"), - "Hidden": visual_json.get("isHidden", False), - "Type": visual_type, - "Display Type": visual_type_display, - "Title": title, - "SubTitle": sub_title, - "Custom Visual": visual_type in custom_visuals, - "Alt Text": alt_text, - "Show Items With No Data": show_all_data, - "Divider": divider, - "Row SubTotals": rst_value, - "Column SubTotals": cst_value, - "Slicer Type": slicer_type, - "Data Visual": is_data_visual, - "Has Sparkline": has_sparkline, - "Visual Filter Count": visual_filter_count, - "Data Limit": data_limit, - } + new_data = { + "File Path": path, + "Page Name": page_id, + "Page Display Name": page_display, + "Visual Name": payload.get("name"), + "X": pos.get("x"), + "Y": pos.get("y"), + "Z": pos.get("z"), + "Width": pos.get("width"), + "Height": pos.get("height"), + "Tab Order": pos.get("tabOrder"), + "Hidden": payload.get("isHidden", False), + "Type": visual_type, + "Display Type": visual_type_display, + "Title": title, + "SubTitle": sub_title, + "Custom Visual": visual_type in custom_visuals, + "Alt Text": alt_text, + "Show Items With No Data": show_all_data, + "Divider": divider, + "Row SubTotals": rst_value, + "Column SubTotals": cst_value, + "Slicer Type": slicer_type, + "Data Visual": is_data_visual, + "Has Sparkline": has_sparkline, + "Visual Filter Count": visual_filter_count, + "Data Limit": data_limit, + } + dfs.append(pd.DataFrame(new_data, index=[0])) - df = pd.concat( - [df, pd.DataFrame(new_data, index=[0])], ignore_index=True - ) + if dfs: + df = pd.concat(dfs, ignore_index=True) grouped_df = ( self.list_visual_objects() @@ -928,8 +1316,9 @@ def list_visual_objects(self, extended: bool = False) -> pd.DataFrame: pandas.DataFrame A pandas dataframe containing a list of all semantic model objects used in each visual in the report. """ + self._ensure_pbir() - page_mapping, visual_mapping = helper.visual_page_mapping(self) + visual_mapping = self._visual_page_mapping() columns = { "Page Name": "str", @@ -966,17 +1355,25 @@ def find_entity_property_pairs(data, result=None, keys_path=None): keys_path = [] if isinstance(data, dict): + expression = data.get("Expression", {}) + source_ref = ( + expression.get("SourceRef", {}) + if isinstance(expression, dict) + else {} + ) if ( - "Entity" in data.get("Expression", {}).get("SourceRef", {}) + isinstance(source_ref, dict) + and "Entity" in source_ref and "Property" in data ): - entity = ( - data.get("Expression", {}) - .get("SourceRef", {}) - .get("Entity", {}) + entity = source_ref.get("Entity", "") + property_value = data.get("Property", "") + + object_type = ( + keys_path[-1].replace("HierarchyLevel", "Hierarchy") + if keys_path + else "Unknown" ) - property_value = data.get("Property", {}) - object_type = keys_path[-1].replace("HierarchyLevel", "Hierarchy") is_agg = keys_path[-3] == "Aggregation" is_viz_calc = keys_path[-3] == "NativeVisualCalculation" is_sparkline = keys_path[-3] == "SparklineData" @@ -987,7 +1384,8 @@ def find_entity_property_pairs(data, result=None, keys_path=None): is_viz_calc, is_sparkline, ) - keys_path.pop() + if keys_path: + keys_path.pop() # Recursively search the rest of the dictionary for key, value in data.items(): @@ -1000,59 +1398,58 @@ def find_entity_property_pairs(data, result=None, keys_path=None): return result - for _, r in self.rdef.iterrows(): - file_path = r["path"] - payload = r["payload"] - if file_path.endswith("/visual.json"): - visual_file = base64.b64decode(payload).decode("utf-8") - visual_json = json.loads(visual_file) - page_id = visual_mapping.get(file_path)[0] - page_display = visual_mapping.get(file_path)[1] - - entity_property_pairs = find_entity_property_pairs(visual_json) - query_state = ( - visual_json.get("visual", {}).get("query", {}).get("queryState", {}) - ) + dfs = [] + for v in self.__all_visuals(): + path = v.get("path") + payload = v.get("payload") + page_id = visual_mapping.get(path)[0] + page_display = visual_mapping.get(path)[1] - format_mapping = {} - obj_display_mapping = {} - for a, p in query_state.items(): - for proj in p.get("projections", []): - query_ref = proj.get("queryRef") - fmt = proj.get("format") - obj_display_name = proj.get("displayName") - if fmt is not None: - format_mapping[query_ref] = fmt - obj_display_mapping[query_ref] = obj_display_name + entity_property_pairs = find_entity_property_pairs(payload) + query_state = ( + payload.get("visual", {}).get("query", {}).get("queryState", {}) + ) - for object_name, properties in entity_property_pairs.items(): - table_name = properties[0] - obj_full = f"{table_name}.{object_name}" - is_agg = properties[2] - format_value = format_mapping.get(obj_full) - obj_display = obj_display_mapping.get(obj_full) - - if is_agg: - for k, v in format_mapping.items(): - if obj_full in k: - format_value = v - new_data = { - "Page Name": page_id, - "Page Display Name": page_display, - "Visual Name": visual_json.get("name"), - "Table Name": table_name, - "Object Name": object_name, - "Object Type": properties[1], - "Implicit Measure": is_agg, - "Sparkline": properties[4], - "Visual Calc": properties[3], - "Format": format_value, - "Object Display Name": obj_display, - } + format_mapping = {} + obj_display_mapping = {} + for a, p in query_state.items(): + for proj in p.get("projections", []): + query_ref = proj.get("queryRef") + fmt = proj.get("format") + obj_display_name = proj.get("displayName") + if fmt is not None: + format_mapping[query_ref] = fmt + obj_display_mapping[query_ref] = obj_display_name + + for object_name, properties in entity_property_pairs.items(): + table_name = properties[0] + obj_full = f"{table_name}.{object_name}" + is_agg = properties[2] + format_value = format_mapping.get(obj_full) + obj_display = obj_display_mapping.get(obj_full) + + if is_agg: + for k, v in format_mapping.items(): + if obj_full in k: + format_value = v + new_data = { + "Page Name": page_id, + "Page Display Name": page_display, + "Visual Name": payload.get("name"), + "Table Name": table_name, + "Object Name": object_name, + "Object Type": properties[1], + "Implicit Measure": is_agg, + "Sparkline": properties[4], + "Visual Calc": properties[3], + "Format": format_value, + "Object Display Name": obj_display, + } - df = pd.concat( - [df, pd.DataFrame(new_data, index=[0])], ignore_index=True - ) + dfs.append(pd.DataFrame(new_data, index=[0])) + + if dfs: + df = pd.concat(dfs, ignore_index=True) if extended: df = self._add_extended(dataframe=df) @@ -1077,6 +1474,7 @@ def list_semantic_model_objects(self, extended: bool = False) -> pd.DataFrame: pandas.DataFrame A pandas dataframe showing the semantic model objects used in the report. """ + self._ensure_pbir() from sempy_labs.tom import connect_semantic_model @@ -1096,7 +1494,7 @@ def list_semantic_model_objects(self, extended: bool = False) -> pd.DataFrame: rf_subset = rf[["Table Name", "Object Name", "Object Type"]].copy() rf_subset["Report Source"] = "Report Filter" - rf_subset["Report Source Object"] = self._report + rf_subset["Report Source Object"] = self._report_name pf_subset = pf[ ["Table Name", "Object Name", "Object Type", "Page Display Name"] @@ -1140,9 +1538,9 @@ def list_semantic_model_objects(self, extended: bool = False) -> pd.DataFrame: ) if extended: - dataset_id, dataset_name, dataset_workspace_id, dataset_workspace_name = ( + (dataset_id, dataset_name, dataset_workspace_id, dataset_workspace_name) = ( resolve_dataset_from_report( - report=self._report, workspace=self._workspace_id + report=self._report_id, workspace=self._workspace_id ) ) @@ -1186,7 +1584,7 @@ def _list_all_semantic_model_objects(self): ) dataset_id, dataset_name, dataset_workspace_id, dataset_workspace_name = ( resolve_dataset_from_report( - report=self._report, workspace=self._workspace_id + report=self._report_id, workspace=self._workspace_id ) ) dep = get_measure_dependencies( @@ -1221,8 +1619,7 @@ def list_bookmarks(self) -> pd.DataFrame: pandas.DataFrame A pandas dataframe containing a list of all bookmarks in the report. """ - - rd = self.rdef + self._ensure_pbir() columns = { "File Path": "str", @@ -1235,31 +1632,34 @@ def list_bookmarks(self) -> pd.DataFrame: } df = _create_dataframe(columns=columns) - bookmark_rows = rd[rd["path"].str.endswith(".bookmark.json")] + bookmarks = [ + o + for o in self._report_definition.get("parts") + if o.get("path").endswith("/bookmark.json") + ] - for _, r in bookmark_rows.iterrows(): - path = r["path"] - payload = r["payload"] + dfs = [] - obj_file = base64.b64decode(payload).decode("utf-8") - obj_json = json.loads(obj_file) + for b in bookmarks: + path = b.get("path") + payload = b.get("payload") - bookmark_name = obj_json.get("name") - bookmark_display = obj_json.get("displayName") - rpt_page_id = obj_json.get("explorationState", {}).get("activeSection") - page_id, page_display, file_path = helper.resolve_page_name( - self, page_name=rpt_page_id + bookmark_name = payload.get("name") + bookmark_display = payload.get("displayName") + rpt_page_id = payload.get("explorationState", {}).get("activeSection") + (page_id, page_display) = self._resolve_page_name_and_display_name( + rpt_page_id ) - for rptPg in obj_json.get("explorationState", {}).get("sections", {}): + for rptPg in payload.get("explorationState", {}).get("sections", {}): for visual_name in ( - obj_json.get("explorationState", {}) + payload.get("explorationState", {}) .get("sections", {}) .get(rptPg, {}) .get("visualContainers", {}) ): if ( - obj_json.get("explorationState", {}) + payload.get("explorationState", {}) .get("sections", {}) .get(rptPg, {}) .get("visualContainers", {}) @@ -1282,9 +1682,10 @@ def list_bookmarks(self) -> pd.DataFrame: "Visual Name": visual_name, "Visual Hidden": visual_hidden, } - df = pd.concat( - [df, pd.DataFrame(new_data, index=[0])], ignore_index=True - ) + dfs.append(pd.DataFrame(new_data, index=[0])) + + if dfs: + df = pd.concat(dfs, ignore_index=True) _update_dataframe_datatypes(dataframe=df, column_map=columns) @@ -1303,114 +1704,99 @@ def list_report_level_measures(self) -> pd.DataFrame: A pandas dataframe containing a list of all report-level measures in the report. """ + self._ensure_pbir() + columns = { "Measure Name": "str", "Table Name": "str", "Expression": "str", "Data Type": "str", "Format String": "str", + "Data Category": "str", } df = _create_dataframe(columns=columns) - rd_filt = self.rdef[self.rdef["path"] == "definition/reportExtensions.json"] + # If no report extensions path, return empty DataFrame + if self._report_extensions_path not in self.list_paths()["Path"].values: + return df - if len(rd_filt) == 1: - payload = rd_filt["payload"].iloc[0] - obj_file = base64.b64decode(payload).decode("utf-8") - obj_json = json.loads(obj_file) + report_file = self.get(file_path=self._report_extensions_path) - for e in obj_json.get("entities", []): - table_name = e.get("name") - for m in e.get("measures", []): - measure_name = m.get("name") - expr = m.get("expression") - data_type = m.get("dataType") - format_string = m.get("formatString") + dfs = [] + for e in report_file.get("entities", []): + table_name = e.get("name") + for m in e.get("measures", []): + measure_name = m.get("name") + expr = m.get("expression") + data_type = m.get("dataType") + format_string = m.get("formatString") + data_category = m.get("dataCategory") - new_data = { - "Measure Name": measure_name, - "Table Name": table_name, - "Expression": expr, - "Data Type": data_type, - "Format String": format_string, - } - df = pd.concat( - [df, pd.DataFrame(new_data, index=[0])], ignore_index=True - ) + new_data = { + "Measure Name": measure_name, + "Table Name": table_name, + "Expression": expr, + "Data Type": data_type, + "Format String": format_string, + "Data Category": data_category, + } + dfs.append(pd.DataFrame(new_data, index=[0])) + + if dfs: + df = pd.concat(dfs, ignore_index=True) return df - def _list_annotations(self) -> pd.DataFrame: + def get_theme(self, theme_type: str = "baseTheme") -> dict: """ - Shows a list of annotations in the report. + Obtains the theme file of the report. + + Parameters + ---------- + theme_type : str, default="baseTheme" + The theme type. Options: "baseTheme", "customTheme". Returns ------- - pandas.DataFrame - A pandas dataframe showing a list of report, page and visual annotations in the report. + dict + The theme.json file """ - columns = { - "Type": "str", - "Object Name": "str", - "Annotation Name": "str", - "Annotation Value": "str", - } - df = _create_dataframe(columns=columns) + self._ensure_pbir() - page_mapping, visual_mapping = helper.visual_page_mapping(self) - for _, r in self.rdef.iterrows(): - payload = r["payload"] - path = r["path"] - if path == "definition/report.json": - file = _decode_b64(payload) - json_file = json.loads(file) - if "annotations" in json_file: - for ann in json_file["annotations"]: - new_data = { - "Type": "Report", - "Object Name": self._report, - "Annotation Name": ann.get("name"), - "Annotation Value": ann.get("value"), - } - df = pd.concat( - [df, pd.DataFrame(new_data, index=[0])], ignore_index=True - ) - elif path.endswith("/page.json"): - file = _decode_b64(payload) - json_file = json.loads(file) - if "annotations" in json_file: - for ann in json_file["annotations"]: - new_data = { - "Type": "Page", - "Object Name": json_file.get("displayName"), - "Annotation Name": ann.get("name"), - "Annotation Value": ann.get("value"), - } - df = pd.concat( - [df, pd.DataFrame(new_data, index=[0])], ignore_index=True - ) - elif path.endswith("/visual.json"): - file = _decode_b64(payload) - json_file = json.loads(file) - page_display = visual_mapping.get(path)[1] - visual_name = json_file.get("name") - if "annotations" in json_file: - for ann in json_file["annotations"]: - new_data = { - "Type": "Visual", - "Object Name": f"'{page_display}'[{visual_name}]", - "Annotation Name": ann.get("name"), - "Annotation Value": ann.get("value"), - } - df = pd.concat( - [df, pd.DataFrame(new_data, index=[0])], ignore_index=True - ) + theme_types = ["baseTheme", "customTheme"] + theme_type = theme_type.lower() - return df + if "custom" in theme_type: + theme_type = "customTheme" + elif "base" in theme_type: + theme_type = "baseTheme" + if theme_type not in theme_types: + raise ValueError( + f"{icons.red_dot} Invalid theme type. Valid options: {theme_types}." + ) - # Automation functions + report_file = self.get(file_path=self._report_file_path) + theme_collection = report_file.get("themeCollection", {}) + if theme_type not in theme_collection: + raise ValueError( + f"{icons.red_dot} The {self._report} report within the '{self._workspace_name} workspace has no custom theme." + ) + ct = theme_collection.get(theme_type) + theme_name = ct["name"] + theme_location = ct["type"] + theme_file_path = f"StaticResources/{theme_location}/{theme_name}" + if theme_type == "baseTheme": + theme_file_path = ( + f"StaticResources/{theme_location}/BaseThemes/{theme_name}" + ) + if not theme_file_path.endswith(".json"): + theme_file_path = f"{theme_file_path}.json" + + return self.get(file_path=theme_file_path) + + # Action functions def set_theme(self, theme_file_path: str): """ Sets a custom theme for a report based on a theme .json file. @@ -1423,98 +1809,85 @@ def set_theme(self, theme_file_path: str): Example for web url: file_path = 'https://raw.githubusercontent.com/PowerBiDevCamp/FabricUserApiDemo/main/FabricUserApiDemo/DefinitionTemplates/Shared/Reports/StaticResources/SharedResources/BaseThemes/CY23SU08.json' """ - report_path = "definition/report.json" - theme_version = "5.5.4" - request_body = {"definition": {"parts": []}} + self._ensure_pbir() + theme_version = "5.6.4" + # Open file if not theme_file_path.endswith(".json"): raise ValueError( f"{icons.red_dot} The '{theme_file_path}' theme file path must be a .json file." ) elif theme_file_path.startswith("https://"): response = requests.get(theme_file_path) - json_file = response.json() - elif theme_file_path.startswith("/lakehouse"): + theme_file = response.json() + elif theme_file_path.startswith("/lakehouse") or theme_file_path.startswith( + "/synfs/" + ): with open(theme_file_path, "r", encoding="utf-8-sig") as file: - json_file = json.load(file) + theme_file = json.load(file) else: ValueError( f"{icons.red_dot} Incorrect theme file path value '{theme_file_path}'." ) - theme_name = json_file["name"] + theme_name = theme_file.get("name") theme_name_full = f"{theme_name}.json" - rd = self.rdef - # Add theme.json file to request_body - file_payload = _conv_b64(json_file) - filePath = f"StaticResources/RegisteredResources/{theme_name_full}" + # Add theme.json file + self.add( + file_path=f"StaticResources/RegisteredResources/{theme_name_full}", + payload=theme_file, + ) + + custom_theme = { + "name": theme_name_full, + "reportVersionAtImport": theme_version, + "type": "RegisteredResources", + } - _add_part(request_body, filePath, file_payload) + self.set_json( + file_path=self._report_file_path, + json_path="$.themeCollection.customTheme", + json_value=custom_theme, + ) - new_theme = { + # Update + report_file = self.get( + file_path=self._report_file_path, json_path="$.resourcePackages" + ) + new_item = { "name": theme_name_full, "path": theme_name_full, "type": "CustomTheme", } + # Find or create RegisteredResources + registered = next( + (res for res in report_file if res["name"] == "RegisteredResources"), None + ) - for _, r in rd.iterrows(): - path = r["path"] - payload = r["payload"] - if path == filePath: - pass - elif path != report_path: - _add_part(request_body, path, payload) - # Update the report.json file - else: - rptFile = base64.b64decode(payload).decode("utf-8") - rptJson = json.loads(rptFile) - resource_type = "RegisteredResources" - - # Add to theme collection - if "customTheme" not in rptJson["themeCollection"]: - rptJson["themeCollection"]["customTheme"] = { - "name": theme_name_full, - "reportVersionAtImport": theme_version, - "type": resource_type, - } - else: - rptJson["themeCollection"]["customTheme"]["name"] = theme_name_full - rptJson["themeCollection"]["customTheme"]["type"] = resource_type - - for package in rptJson["resourcePackages"]: - package["items"] = [ - item - for item in package["items"] - if item["type"] != "CustomTheme" - ] - - if not any( - package["name"] == resource_type - for package in rptJson["resourcePackages"] - ): - new_registered_resources = { - "name": resource_type, - "type": resource_type, - "items": [new_theme], - } - rptJson["resourcePackages"].append(new_registered_resources) - else: - names = [ - rp["name"] for rp in rptJson["resourcePackages"][1]["items"] - ] - - if theme_name_full not in names: - rptJson["resourcePackages"][1]["items"].append(new_theme) - - file_payload = _conv_b64(rptJson) - _add_part(request_body, path, file_payload) - - self.update_report(request_body=request_body) - print( - f"{icons.green_dot} The '{theme_name}' theme has been set as the theme for the '{self._report}' report within the '{self._workspace_name}' workspace." + if not registered: + registered = { + "name": "RegisteredResources", + "type": "RegisteredResources", + "items": [new_item], + } + report_file.append(registered) + else: + # Check for duplicate by 'name' + if all(item["name"] != new_item["name"] for item in registered["items"]): + registered["items"].append(new_item) + + self.set_json( + file_path=self._report_file_path, + json_path="$.resourcePackages", + json_value=report_file, ) + if not self._readonly: + print( + f"{icons.green_dot} The '{theme_name}' theme has been set as the theme for the '{self._report_name}' report within the '{self._workspace_name}' workspace." + ) + def set_active_page(self, page_name: str): """ Sets the active page (first page displayed when opening a report) for a report. @@ -1524,25 +1897,22 @@ def set_active_page(self, page_name: str): page_name : str The page name or page display name of the report. """ + self._ensure_pbir() - pages_file = "definition/pages/pages.json" - page_id, page_display_name, file_path = helper.resolve_page_name( - self, page_name=page_name + (page_id, page_display_name) = self._resolve_page_name_and_display_name( + page_name ) - - pagePath = self.rdef[self.rdef["path"] == pages_file] - payload = pagePath["payload"].iloc[0] - page_file = _decode_b64(payload) - json_file = json.loads(page_file) - json_file["activePageName"] = page_id - file_payload = _conv_b64(json_file) - - self._update_single_file(file_name=pages_file, new_payload=file_payload) - - print( - f"{icons.green_dot} The '{page_display_name}' page has been set as the active page in the '{self._report}' report within the '{self._workspace_name}' workspace." + self.set_json( + file_path=self._pages_file_path, + json_path="$.activePageName", + json_value=page_id, ) + if not self._readonly: + print( + f"{icons.green_dot} The '{page_display_name}' page has been set as the active page in the '{self._report_name}' report within the '{self._workspace_name}' workspace." + ) + def set_page_type(self, page_name: str, page_type: str): """ Changes the page type of a report page. @@ -1554,6 +1924,7 @@ def set_page_type(self, page_name: str, page_type: str): page_type : str The page type. Valid page types: 'Tooltip', 'Letter', '4:3', '16:9'. """ + self._ensure_pbir() if page_type not in helper.page_types: raise ValueError( @@ -1575,69 +1946,122 @@ def set_page_type(self, page_name: str, page_type: str): f"{icons.red_dot} Invalid page_type parameter. Valid options: ['Tooltip', 'Letter', '4:3', '16:9']." ) - page_id, page_display_name, file_path = helper.resolve_page_name( - self, page_name=page_name + (file_path, page_id, page_display_name) = ( + self.__resolve_page_name_and_display_name_file_path(page_name) ) - rd_filt = self.rdef[self.rdef["path"] == file_path] - payload = rd_filt["payload"].iloc[0] - page_file = _decode_b64(payload) - json_file = json.loads(page_file) - json_file["width"] = width - json_file["height"] = height - file_payload = _conv_b64(json_file) - - self._update_single_file(file_name=file_path, new_payload=file_payload) - - print( - f"{icons.green_dot} The '{page_display_name}' page has been updated to the '{page_type}' page type." + + self.set_json(file_path=file_path, json_path="$.width", json_value=width) + self.set_json(file_path=file_path, json_path="$.height", json_value=height) + + if not self._readonly: + print( + f"{icons.green_dot} The '{page_display_name}' page has been updated to the '{page_type}' page type." + ) + + # def set_page_vertical_alignment(self, page: str, vertical_alignment: Literal["Top", "Middle"] = "Top"): + + def set_page_visibility(self, page_name: str, hidden: bool): + """ + Sets whether a report page is visible or hidden. + + Parameters + ---------- + page_name : str + The page name or page display name of the report. + hidden : bool + If set to True, hides the report page. + If set to False, makes the report page visible. + """ + self._ensure_pbir() + (file_path, page_id, page_display_name) = ( + self.__resolve_page_name_and_display_name_file_path(page_name) ) - def remove_unnecessary_custom_visuals(self): + if hidden: + self.set_json( + file_path=file_path, + json_path="$.visibility", + json_value="HiddenInViewMode", + ) + else: + self.remove(file_path=file_path, json_path="$.visibility", verbose=False) + + visibility = "visible" if hidden is False else "hidden" + + if not self._readonly: + print( + f"{icons.green_dot} The '{page_display_name}' page has been set to '{visibility}' in the '{self._report_name}' report within the '{self._workspace_name}' workspace." + ) + + def hide_tooltip_drillthrough_pages(self): """ - Removes any custom visuals within the report that are not used in the report. + Hides all tooltip pages and drillthrough pages in a report. """ - dfCV = self.list_custom_visuals() - dfV = self.list_visuals() - rd = self.rdef - cv_remove = [] - cv_remove_display = [] - request_body = {"definition": {"parts": []}} - - for _, r in dfCV.iterrows(): - cv = r["Custom Visual Name"] - cv_display = r["Custom Visual Display Name"] - dfV_filt = dfV[dfV["Type"] == cv] - if len(dfV_filt) == 0: - cv_remove.append(cv) # Add to the list for removal - cv_remove_display.append(cv_display) - if len(cv_remove) == 0: + dfP = self.list_pages() + dfP_filt = dfP[ + (dfP["Type"] == "Tooltip") | (dfP["Drillthrough Target Page"] == True) + ] + + if dfP_filt.empty: print( - f"{icons.green_dot} There are no unnecessary custom visuals in the '{self._report}' report within the '{self._workspace_name}' workspace." + f"{icons.green_dot} There are no Tooltip or Drillthrough pages in the '{self._report_name}' report within the '{self._workspace_name}' workspace." ) return - for _, r in rd.iterrows(): - file_path = r["path"] - payload = r["payload"] - if file_path == "definition/report.json": - rpt_file = base64.b64decode(payload).decode("utf-8") - rpt_json = json.loads(rpt_file) - rpt_json["publicCustomVisuals"] = [ - item - for item in rpt_json["publicCustomVisuals"] - if item not in cv_remove - ] + for _, r in dfP_filt.iterrows(): + page_name = r["Page Name"] + self.set_page_visibility(page_name=page_name, hidden=True) + + def disable_show_items_with_no_data(self): + """ + Disables the `show items with no data `_ property in all visuals within the report. + """ + + self.remove( + file_path="definition/pages/*/visual.json", + json_path="$..showAll", + verbose=False, + ) - payload = _conv_b64(rpt_json) + if not self._readonly: + print( + f"{icons.green_dot} Show items with data has been disabled for all visuals in the '{self._report_name}' report within the '{self._workspace_name}' workspace." + ) + + def remove_unnecessary_custom_visuals(self): + """ + Removes any custom visuals within the report that are not used in the report. + """ + + dfCV = self.list_custom_visuals() + df = dfCV[dfCV["Used in Report"] == False] - _add_part(request_body, file_path, payload) + if not df.empty: + cv_remove = df["Custom Visual Name"].values() + cv_remove_display = df["Custom Visual Display Name"].values() + else: + print( + f"{icons.red_dot} There are no unnecessary custom visuals in the '{self._report_name}' report within the '{self._workspace_name}' workspace." + ) + return - self.update_report(request_body=request_body) - print( - f"{icons.green_dot} The {cv_remove_display} custom visuals have been removed from the '{self._report}' report within the '{self._workspace_name}' workspace." + json_path = "$.publicCustomVisuals" + custom_visuals = self.get(file_path=self._report_file_path, json_path=json_path) + updated_custom_visuals = [ + item for item in custom_visuals if item not in cv_remove + ] + self.set_json( + file_path=self._report_path, + json_path=json_path, + json_value=updated_custom_visuals, ) + if not self._readonly: + print( + f"{icons.green_dot} The {cv_remove_display} custom visuals have been removed from the '{self._report_name}' report within the '{self._workspace_name}' workspace." + ) + def migrate_report_level_measures(self, measures: Optional[str | List[str]] = None): """ Moves all report-level measures from the report to the semantic model on which the report is based. @@ -1648,555 +2072,883 @@ def migrate_report_level_measures(self, measures: Optional[str | List[str]] = No A measure or list of measures to move to the semantic model. Defaults to None which resolves to moving all report-level measures to the semantic model. """ + self._ensure_pbir() from sempy_labs.tom import connect_semantic_model rlm = self.list_report_level_measures() - if len(rlm) == 0: + if rlm.empty: print( - f"{icons.green_dot} The '{self._report}' report within the '{self._workspace_name}' workspace has no report-level measures." + f"{icons.info} The '{self._report_name}' report within the '{self._workspace_name}' workspace has no report-level measures." ) return dataset_id, dataset_name, dataset_workspace_id, dataset_workspace_name = ( resolve_dataset_from_report( - report=self._report, workspace=self._workspace_id + report=self._report_id, workspace=self._workspace_id ) ) if isinstance(measures, str): measures = [measures] - request_body = {"definition": {"parts": []}} - rpt_file = "definition/reportExtensions.json" - - rd = self.rdef - rd_filt = rd[rd["path"] == rpt_file] - payload = rd_filt["payload"].iloc[0] - extFile = base64.b64decode(payload).decode("utf-8") - extJson = json.loads(extFile) + file = self.get(file_path=self._report_extensions_path) mCount = 0 with connect_semantic_model( dataset=dataset_id, readonly=False, workspace=dataset_workspace_id ) as tom: + existing_measures = [m.Name for m in tom.all_measures()] for _, r in rlm.iterrows(): - tableName = r["Table Name"] - mName = r["Measure Name"] - mExpr = r["Expression"] + table_name = r["Table Name"] + measure_name = r["Measure Name"] + expr = r["Expression"] # mDataType = r["Data Type"] - mformatString = r["Format String"] + format_string = r["Format String"] # Add measures to the model - if mName in measures or measures is None: + if ( + measure_name in measures or measures is None + ) and measure_name not in existing_measures: tom.add_measure( - table_name=tableName, - measure_name=mName, - expression=mExpr, - format_string=mformatString, + table_name=table_name, + measure_name=measure_name, + expression=expr, + format_string=format_string, ) tom.set_annotation( - object=tom.model.Tables[tableName].Measures[mName], + object=tom.model.Tables[table_name].Measures[measure_name], name="semanticlinklabs", value="reportlevelmeasure", ) mCount += 1 # Remove measures from the json if measures is not None and len(measures) < mCount: - for e in extJson["entities"]: + for e in file["entities"]: e["measures"] = [ measure for measure in e["measures"] if measure["name"] not in measures ] - extJson["entities"] = [ - entity for entity in extJson["entities"] if entity["measures"] + file["entities"] = [ + entity for entity in file["entities"] if entity["measures"] ] - file_payload = _conv_b64(extJson) - _add_part(request_body, rpt_file, file_payload) - - # Add unchanged payloads - for _, r in rd.iterrows(): - path = r["path"] - payload = r["payload"] - if path != rpt_file: - _add_part(request_body, path, payload) - - self.update_report(request_body=request_body) - print( - f"{icons.green_dot} The report-level measures have been migrated to the '{dataset_name}' semantic model within the '{dataset_workspace_name}' workspace." - ) + self.update(file_path=self._report_extensions_path, payload=file) + # what about if measures is None? - def set_page_visibility(self, page_name: str, hidden: bool): + if not self._readonly: + print( + f"{icons.green_dot} The report-level measures have been migrated to the '{dataset_name}' semantic model within the '{dataset_workspace_name}' workspace." + ) + + # In progress... + def _list_annotations(self) -> pd.DataFrame: """ - Sets whether a report page is visible or hidden. + Shows a list of annotations in the report. - Parameters - ---------- - page_name : str - The page name or page display name of the report. - hidden : bool - If set to True, hides the report page. - If set to False, makes the report page visible. + Returns + ------- + pandas.DataFrame + A pandas dataframe showing a list of report, page and visual annotations in the report. """ - page_id, page_display_name, file_path = helper.resolve_page_name( - self, page_name=page_name - ) - visibility = "visible" if hidden is False else "hidden" + columns = { + "Type": "str", + "Object Name": "str", + "Annotation Name": "str", + "Annotation Value": "str", + } + df = _create_dataframe(columns=columns) - rd_filt = self.rdef[self.rdef["path"] == file_path] - payload = rd_filt["payload"].iloc[0] - obj_file = _decode_b64(payload) - obj_json = json.loads(obj_file) - if hidden: - obj_json["visibility"] = "HiddenInViewMode" - else: - if "visibility" in obj_json: - del obj_json["visibility"] - new_payload = _conv_b64(obj_json) + visual_mapping = self._visual_page_mapping() + report_file = self.get(file_path="definition/report.json") + + dfs = [] + if "annotations" in report_file: + for ann in report_file["annotations"]: + new_data = { + "Type": "Report", + "Object Name": self._report_name, + "Annotation Name": ann.get("name"), + "Annotation Value": ann.get("value"), + } + dfs.append(pd.DataFrame(new_data, index=[0])) + + for p in self.__all_pages(): + path = p.get("path") + payload = p.get("payload") + page_name = payload.get("displayName") + if "annotations" in payload: + for ann in payload["annotations"]: + new_data = { + "Type": "Page", + "Object Name": page_name, + "Annotation Name": ann.get("name"), + "Annotation Value": ann.get("value"), + } + dfs.append(pd.DataFrame(new_data, index=[0])) + + for v in self.__all_visuals(): + path = v.get("path") + payload = v.get("payload") + page_display = visual_mapping.get(path)[1] + visual_name = payload.get("name") + if "annotations" in payload: + for ann in payload["annotations"]: + new_data = { + "Type": "Visual", + "Object Name": f"'{page_display}'[{visual_name}]", + "Annotation Name": ann.get("name"), + "Annotation Value": ann.get("value"), + } + dfs.append(pd.DataFrame(new_data, index=[0])) - self._update_single_file(file_name=file_path, new_payload=new_payload) + if dfs: + df = pd.concat(dfs, ignore_index=True) - print( - f"{icons.green_dot} The '{page_display_name}' page has been set to {visibility}." - ) + return df - def hide_tooltip_drillthrough_pages(self): + def _add_image(self, image_path: str, resource_name: Optional[str] = None) -> str: """ - Hides all tooltip pages and drillthrough pages in a report. + Add an image to the report definition. The image will be added to the StaticResources/RegisteredResources folder in the report definition. If the image_name already exists as a file in the report definition it will be updated. + + Parameters + ---------- + image_path : str + The path of the image file to be added. For example: "./builtin/MyImage.png". + resource_name : str, default=None + The name of the image file to be added. For example: "MyImage.png". If not specified, the name will be derived from the image path and a unique ID will be appended to it. + + Returns + ------- + str + The name of the image file added to the report definition. """ + self._ensure_pbir() - dfP = self.list_pages() - dfP_filt = dfP[ - (dfP["Type"] == "Tooltip") | (dfP["Drillthrough Target Page"] == True) - ] + id = generate_number_guid() - if len(dfP_filt) == 0: - print( - f"{icons.green_dot} There are no Tooltip or Drillthrough pages in the '{self._report}' report within the '{self._workspace_name}' workspace." + if image_path.startswith("http://") or image_path.startswith("https://"): + response = requests.get(image_path) + response.raise_for_status() + image_bytes = response.content + # Extract the suffix (extension) from the URL path + suffix = Path(urlparse(image_path).path).suffix + else: + with open(image_path, "rb") as image_file: + image_bytes = image_file.read() + suffix = Path(image_path).suffix + + payload = base64.b64encode(image_bytes).decode("utf-8") + if resource_name is None: + resource_name = os.path.splitext(os.path.basename(image_path))[0] + file_name = f"{resource_name}{id}{suffix}" + else: + file_name = resource_name + file_path = f"StaticResources/RegisteredResources/{file_name}" + + # Add StaticResources/RegisteredResources file. If the file already exists, update it. + try: + self.get(file_path=file_path) + self.update(file_path=file_path, payload=payload) + except Exception: + self.add( + file_path=file_path, + payload=payload, ) - return - for _, r in dfP_filt.iterrows(): - page_name = r["Page Name"] - self.set_page_visibility(page_name=page_name, hidden=True) + # Add to report.json file + self.__add_to_registered_resources( + name=file_name, + path=file_name, + type="Image", + ) - def disable_show_items_with_no_data(self): - """ - Disables the `show items with no data `_ property in all visuals within the report. - """ + return file_name - request_body = {"definition": {"parts": []}} - - def delete_key_in_json(obj, key_to_delete): - if isinstance(obj, dict): - if key_to_delete in obj: - del obj[key_to_delete] - for key, value in obj.items(): - delete_key_in_json(value, key_to_delete) - elif isinstance(obj, list): - for item in obj: - delete_key_in_json(item, key_to_delete) - - rd = self.rdef - for _, r in rd.iterrows(): - file_path = r["path"] - payload = r["payload"] - if file_path.endswith("/visual.json"): - objFile = base64.b64decode(payload).decode("utf-8") - objJson = json.loads(objFile) - delete_key_in_json(objJson, "showAll") - _add_part(request_body, file_path, _conv_b64(objJson)) - else: - _add_part(request_body, file_path, payload) + def _remove_wallpaper(self, page: Optional[str | List[str]] = None): + """ + Remove the wallpaper image from a page. - self.update_report(request_body=request_body) - print( - f"{icons.green_dot} Show items with data has been disabled for all visuals in the '{self._report}' report within the '{self._workspace_name}' workspace." - ) + Parameters + ---------- + page : str | List[str], default=None + The name or display name of the page(s) from which the wallpaper image will be removed. + If None, removes from all pages. + """ + self._ensure_pbir() - # Set Annotations - def __set_annotation(self, json_file: dict, name: str, value: str) -> dict: + if isinstance(page, str): + page = [page] - if "annotations" in json_file: - if any( - annotation["name"] == name for annotation in json_file["annotations"] - ): - for annotation in json_file["annotations"]: - if annotation["name"] == name: - annotation["value"] = value - break - else: - json_file["annotations"].append({"name": name, "value": value}) + page_list = [] + if page: + for p in page: + page_id = self.resolve_page_name(p) + page_list.append(page_id) else: - json_file["annotations"] = [] - json_file["annotations"].append({"name": name, "value": value}) + page_list = [ + p.get("payload", {}).get("name") + for p in self.__all_pages() + if p.get("payload") and "name" in p["payload"] + ] - return json_file + for p in self.__all_pages(): + path = p.get("path") + payload = p.get("payload") + page_name = payload.get("name") + page_display_name = payload.get("displayName") + if page_name in page_list: + self.remove(file_path=path, json_path="$.objects.outspace") + print( + f"{icons.green_dot} The wallpaper has been removed from the '{page_display_name}' page." + ) - def _set_annotation( + def _set_wallpaper_color( self, - annotation_name: str, - annotation_value: str, - page_name: Optional[str] = None, - visual_name: Optional[str] = None, + color_value: str, + page: Optional[str | List[str]] = None, + transparency: int = 0, + theme_color_percent: float = 0.0, ): """ - Sets an annotation on the report/page/visual. If the annotation already exists, the annotation value is updated. - In order to set a report annotation, leave page_name=None, visual_name=None. - In order to set a page annotation, leave visual_annotation=None. - In order to set a visual annotation, set all parameters. + Set the wallpaper color of a page (or pages). Parameters ---------- - annotation_name : str - Name of the annotation. - annotation_value : str - Value of the annotation. - page_name : str, default=None - The page name or page display name. - Set this annotation when setting an annotation on a page or visual. - visual_name : str, default=None - The visual name. - Set this property when setting an annotation on a visual. - """ - - if page_name is None and visual_name is None: - file_path = "definition/report.json" - elif page_name is not None and visual_name is None: - page_id, page_display, file_path = helper.resolve_page_name( - self, page_name=page_name - ) - elif page_name is not None and visual_name is not None: - page_name, page_display_name, visual_name, file_path = ( - helper.resolve_visual_name( - self, page_name=page_name, visual_name=visual_name - ) - ) - else: + color_value : str + The color value to be set. This can be a hex color code (e.g., "#FF5733") or an integer based on the theme color. + page : str | List[str], default=None + The name or display name of the page(s) to which the wallpaper color will be applied. + If None, applies to all pages. + transparency : int, default=0 + The transparency level of the wallpaper color. Valid values are between 0 and 100. + theme_color_percent : float, default=0.0 + The percentage of the theme color to be applied. Valid values are between -0.6 and 0.6. + """ + self._ensure_pbir() + + if transparency < 0 or transparency > 100: + raise ValueError(f"{icons.red_dot} Transparency must be between 0 and 100.") + + if theme_color_percent < -0.6 or theme_color_percent > 0.6: raise ValueError( - f"{icons.red_dot} Invalid parameters. If specifying a visual_name you must specify the page_name." + f"{icons.red_dot} Theme color percentage must be between -0.6 and 0.6." ) - payload = self.rdef[self.rdef["path"] == file_path]["payload"].iloc[0] - file = _decode_b64(payload) - json_file = json.loads(file) - - new_file = self.__set_annotation( - json_file, name=annotation_name, value=annotation_value - ) - new_payload = _conv_b64(new_file) + page_list = self.__resolve_page_list(page) - self._update_single_file(file_name=file_path, new_payload=new_payload) + # Define the color dictionary based on color_value type + if isinstance(color_value, int): + color_expr = { + "ThemeDataColor": { + "ColorId": color_value, + "Percent": theme_color_percent, + } + } + elif isinstance(color_value, str) and color_value.startswith("#"): + color_expr = {"Literal": {"Value": f"'{color_value}'"}} + else: + raise NotImplementedError( + f"{icons.red_dot} The color value '{color_value}' is not supported. Please provide a hex color code or an integer based on the color theme." + ) - # Remove Annotations - def __remove_annotation(self, json_file: dict, name: str) -> dict: + color_dict = ({"solid": {"color": {"expr": color_expr}}},) + transparency_dict = {"expr": {"Literal": {"Value": f"{transparency}D"}}} - if "annotations" in json_file: - json_file["annotations"] = [ - annotation - for annotation in json_file["annotations"] - if annotation["name"] != name - ] + for p in self.__all_pages(): + path = p.get("path") + payload = p.get("payload", {}) + page_name = payload.get("name") - return json_file + if page_name in page_list: + self.set_json( + file_path=path, + json_path="$.objects.outspace[*].properties.color", + json_value=color_dict, + ) + self.set_json( + file_path=path, + json_path="$.objects.outspace[*].properties.transparency", + json_value=transparency_dict, + ) - def _remove_annotation( + def _set_wallpaper_image( self, - annotation_name: str, - page_name: Optional[str] = None, - visual_name: Optional[str] = None, + image_path: str, + page: Optional[str | List[str]] = None, + transparency: int = 0, + image_fit: Literal["Normal", "Fit", "Fill"] = "Normal", ): """ - Removes an annotation on the report/page/visual. - In order to remove a report annotation, leave page_name=None, visual_name=None. - In order to remove a page annotation, leave visual_annotation=None. - In order to remove a visual annotation, set all parameters. + Add an image as the wallpaper of a page. Parameters ---------- - annotation_name : str - Name of the annotation. - page_name : str, default=None - The page name or page display name. - Set this annotation when setting an annotation on a page or visual. - visual_name : str, default=None - The visual name. - Set this property when setting an annotation on a visual. - """ - - if page_name is None and visual_name is None: - file_path = "definition/report.json" - elif page_name is not None and visual_name is None: - page_id, page_display, file_path = helper.resolve_page_name( - self, page_name=page_name - ) - elif page_name is not None and visual_name is not None: - page_name, page_display_name, visual_name, file_path = ( - helper.resolve_visual_name( - self, page_name=page_name, visual_name=visual_name - ) - ) - else: + image_path : str + The path of the image file to be added. For example: "./builtin/MyImage.png". + page : str | List[str], default=None + The name or display name of the page(s) to which the wallpaper image will be applied. + If None, applies to all pages. + transparency : int, default=0 + The transparency level of the wallpaper image. Valid values are between 0 and 100. + image_fit : str, default="Normal" + The fit type of the wallpaper image. Valid options: "Normal", "Fit", "Fill". + """ + self._ensure_pbir() + + image_fits = ["Normal", "Fit", "Fill"] + image_fit = image_fit.capitalize() + if image_fit not in image_fits: raise ValueError( - f"{icons.red_dot} Invalid parameters. If specifying a visual_name you must specify the page_name." + f"{icons.red_dot} Invalid image fit. Valid options: {image_fits}." ) + if transparency < 0 or transparency > 100: + raise ValueError(f"{icons.red_dot} Transparency must be between 0 and 100.") + + page_list = self.__resolve_page_list(page) + + image_name = os.path.splitext(os.path.basename(image_path))[0] + image_file_path = self._add_image(image_path=image_path, image_name=image_name) + + image_dict = { + "image": { + "name": {"expr": {"Literal": {"Value": f"'{image_file_path}'"}}}, + "url": { + "expr": { + "ResourcePackageItem": { + "PackageName": "RegisteredResources", + "PackageType": 1, + "ItemName": image_file_path, + } + } + }, + "scaling": {"expr": {"Literal": {"Value": f"'{image_fit}'"}}}, + } + } + transparency_dict = {"expr": {"Literal": {"Value": f"{transparency}D"}}} + + for p in self.__all_pages(): + path = p.get("path") + payload = p.get("payload") + page_name = payload.get("name") + if page_name in page_list: + self.set_json( + file_path=path, + json_path="$.objects.outspace[*].properties.image", + json_value=image_dict, + ) + self.set_json( + file_path=path, + json_path="$.objects.outspace[*].properties.transparency", + json_value=transparency_dict, + ) - payload = self.rdef[self.rdef["path"] == file_path]["payload"].iloc[0] - file = _decode_b64(payload) - json_file = json.loads(file) + def _add_blank_page( + self, + name: str, + width: int = 1280, + height: int = 720, + display_option: str = "FitToPage", + ): + self._ensure_pbir() + + page_id = generate_hex() + payload = { + "$schema": "https://developer.microsoft.com/json-schemas/fabric/item/report/definition/page/1.4.0/schema.json", + "name": page_id, + "displayName": name, + "displayOption": display_option, + "height": height, + "width": width, + } + self.add(file_path=f"definition/pages/{page_id}/page.json", payload=payload) - new_file = self.__remove_annotation(json_file, name=annotation_name) - new_payload = _conv_b64(new_file) + # Add the page to the pages.json file + pages_file = self.get(file_path=self._pages_file_path) + pages_file["pageOrder"].append(page_id) - self._update_single_file(file_name=file_path, new_payload=new_payload) + def _add_page(self, payload: dict | bytes, generate_id: bool = True): + """ + Add a new page to the report. - # Get Annotation Value - def __get_annotation_value(self, json_file: dict, name: str) -> str: + Parameters + ---------- + payload : dict | bytes + The json content of the page to be added. This can be a dictionary or a base64 encoded string. + generate_id : bool, default=True + Whether to generate a new page ID. If False, the page ID will be taken from the payload. + """ + self._ensure_pbir() - if "annotations" in json_file: - for ann in json_file["annotations"]: - if ann.get("name") == name: - return ann.get("value") + page_file = decode_payload(payload) + page_file_copy = copy.deepcopy(page_file) - def _get_annotation_value( - self, - annotation_name: str, - page_name: Optional[str] = None, - visual_name: Optional[str] = None, - ) -> str: + if generate_id: + # Generate a new page ID and update the page file accordingly + page_id = generate_hex() + page_file_copy["name"] = page_id + else: + page_id = page_file_copy.get("name") + + self.add( + file_path=f"definition/pages/{page_id}/page.json", payload=page_file_copy + ) + + def _add_visual(self, page: str, payload: dict | bytes, generate_id: bool = True): """ - Retrieves the annotation value of an annotation on the report/page/visual. - In order to retrieve a report annotation value, leave page_name=None, visual_name=None. - In order to retrieve a page annotation value, leave visual_annotation=None. - In order to retrieve a visual annotation value, set all parameters. + Add a new visual to a page in the report. Parameters ---------- - annotation_name : str - Name of the annotation. - page_name : str, default=None - The page name or page display name. - Set this annotation when setting an annotation on a page or visual. - visual_name : str, default=None - The visual name. - Set this property when setting an annotation on a visual. + page : str + The name or display name of the page to which the visual will be added. + payload : dict | bytes + The json content of the visual to be added. This can be a dictionary or a base64 encoded string. + generate_id : bool, default=True + Whether to generate a new visual ID. If False, the visual ID will be taken from the payload. + """ + self._ensure_pbir() + + visual_file = decode_payload(payload) + visual_file_copy = copy.deepcopy(visual_file) + + if generate_id: + # Generate a new visual ID and update the visual file accordingly + visual_id = generate_hex() + visual_file_copy["name"] = visual_id + else: + visual_id = visual_file_copy.get("name") + (page_file_path, page_id, page_name) = ( + self.__resolve_page_name_and_display_name_file_path(page) + ) + visual_file_path = helper.generate_visual_file_path(page_file_path, visual_id) - Returns - ------- - str - The annotation value. + self.add(file_path=visual_file_path, payload=visual_file_copy) + + def _add_new_visual( + self, + page: str, + type: str, + x: int, + y: int, + height: int = 720, + width: int = 1280, + ): + self._ensure_pbir() + + type = helper.resolve_visual_type(type) + visual_id = generate_hex() + (page_file_path, page_id, page_name) = ( + self.__resolve_page_name_and_display_name_file_path(page) + ) + visual_file_path = helper.generate_visual_file_path(page_file_path, visual_id) + + payload = { + "$schema": "https://developer.microsoft.com/json-schemas/fabric/item/report/definition/visualContainer/2.0.0/schema.json", + "name": visual_id, + "position": { + "x": x, + "y": y, + "z": 0, + "height": height, + "width": width, + "tabOrder": 0, + }, + "visual": {"visualType": type, "drillFilterOtherVisuals": True}, + } + + self.add(file_path=visual_file_path, payload=payload) + + def _update_to_theme_colors(self, mapping: dict[str, tuple[int, float]]): """ + Updates the report definition to use theme colors instead of hex colors. - if page_name is None and visual_name is None: - file_path = "definition/report.json" - elif page_name is not None and visual_name is None: - page_id, page_display, file_path = helper.resolve_page_name( - self, page_name=page_name - ) - elif page_name is not None and visual_name is not None: - page_name, page_display_name, visual_name, file_path = ( - helper.resolve_visual_name( - self, page_name=page_name, visual_name=visual_name - ) - ) - else: - raise ValueError( - f"{icons.red_dot} Invalid parameters. If specifying a visual_name you must specify the page_name." - ) + Parameters + ---------- + mapping : dict[str, tuple[int, float] + A dictionary mapping color names to their corresponding theme color IDs. + Example: {"#FF0000": (1, 0), "#00FF00": (2, 0)} + The first value in the tuple is the theme color ID and the second value is the percentage (a value between -0.6 and 0.6). + """ + self._ensure_pbir() - payload = self.rdef[self.rdef["path"] == file_path]["payload"].iloc[0] - file = _decode_b64(payload) - json_file = json.loads(file) - - return self.__get_annotation_value(json_file, name=annotation_name) - - def __adjust_settings( - self, setting_type: str, setting_name: str, setting_value: bool - ): # Meta function - - valid_setting_types = ["settings", "slowDataSourceSettings"] - valid_settings = [ - "isPersistentUserStateDisabled", - "hideVisualContainerHeader", - "defaultFilterActionIsDataFilter", - "useStylableVisualContainerHeader", - "useDefaultAggregateDisplayName", - "useEnhancedTooltips", - "allowChangeFilterTypes", - "disableFilterPaneSearch", - "useCrossReportDrillthrough", - ] - valid_slow_settings = [ - "isCrossHighlightingDisabled", - "isSlicerSelectionsButtonEnabled", - ] + # Ensure theme color mapping is in the correct format (with Percent value) + mapping = {k: (v, 0) if isinstance(v, int) else v for k, v in mapping.items()} - if setting_type not in valid_setting_types: - raise ValueError( - f"Invalid setting_type. Valid options: {valid_setting_types}." - ) - if setting_type == "settings" and setting_name not in valid_settings: - raise ValueError( - f"The '{setting_name}' is not a valid setting. Valid options: {valid_settings}." + out_of_range = { + color: value + for color, value in mapping.items() + if len(value) > 1 and not (-0.6 <= value[1] <= 0.6) + } + + if out_of_range: + print( + f"{icons.red_dot} The following mapping entries have Percent values out of range [-0.6, 0.6]:" ) - if ( - setting_type == "slowDataSourceSettings" - and setting_name not in valid_slow_settings - ): + for color, val in out_of_range.items(): + print(f" {color}: Percent = {val[1]}") raise ValueError( - f"The '{setting_name}' is not a valid setting. Valid options: {valid_slow_settings}." + f"{icons.red_dot} The Percent values must be between -0.6 and 0.6." ) - request_body = {"definition": {"parts": []}} - - rd = self.rdef - for _, r in rd.iterrows(): - path = r["path"] - payload = r["payload"] - if path == "definition/report.json": - obj_file = base64.b64decode(payload).decode("utf-8") - obj_json = json.loads(obj_file) - if setting_value is False: - if setting_name in obj_json.get(setting_type, {}): - del obj_json[setting_type][setting_name] - else: - if setting_name not in obj_json.get(setting_type, {}): - obj_json[setting_type][setting_name] = True + json_path = "$..color.expr.Literal.Value" + jsonpath_expr = parse(json_path) + + for part in [ + part + for part in self._report_definition.get("parts") + if part.get("path").endswith(".json") + ]: + file_path = part.get("path") + payload = part.get("payload") + matches = jsonpath_expr.find(payload) + if matches: + for match in matches: + color_string = match.value.strip("'") + if color_string in mapping: + color_data = mapping[color_string] + if isinstance(color_data, int): + color_data = [color_data, 0] + + # Get reference to parent of 'Value' (i.e. 'Literal') + # literal_dict = match.context.value + # Get reference to parent of 'Literal' (i.e. 'expr') + expr_dict = match.context.context.value + + # Replace the 'expr' with new structure + expr_dict.clear() + expr_dict["ThemeDataColor"] = { + "ColorId": color_data[0], + "Percent": color_data[1], + } - _add_part(request_body, path, _conv_b64(obj_json)) - else: - _add_part(request_body, path, payload) + self.update(file_path=file_path, payload=payload) - upd = self.update_report(request_body=request_body) - if upd == 200: - print(f"{icons.green_dot}") - else: - print(f"{icons.red_dot}") - - def __persist_filters(self, value: Optional[bool] = False): + def _rename_fields(self, mapping: dict): """ - Don't allow end user to save filters on this file in the Power BI service. + Renames fields in the report definition based on the provided rename mapping. + + Parameters + ---------- + mapping : dict + A dictionary containing the mapping of old field names to new field names. + Example: + + { + "columns": { + ("TableName", "OldColumnName1"): "NewColumnName1", + ("TableName", "OldColumnName2"): "NewColumnName2", + }, + "measures": { + ("TableName", "OldMeasureName1"): "NewMeasureName1", + ("TableName", "OldMeasureName2"): "NewMeasureName2", + } + } """ + self._ensure_pbir() - self.adjust_settings( - setting_type="settings", - setting_name="isPersistentUserStateDisabled", - setting_value=value, - ) + selector_mapping = { + key: { + ".".join(k): v # join tuple with '.' to form the string + for k, v in value.items() + } + for key, value in mapping.items() + } - def __hide_visual_header(self, value: Optional[bool] = False): - """ - Hide the visual header in reading view. - """ + for part in [ + part + for part in self._report_definition.get("parts") + if part.get("path").endswith(".json") + ]: + file_path = part.get("path") + payload = part.get("payload") + + # Paths for columns, measures, and expressions + col_expr_path = parse("$..Column") + meas_expr_path = parse("$..Measure") + entity_ref_path = parse("$..Expression.SourceRef.Entity") + query_ref_path = parse("$..queryRef") + native_query_ref_path = parse("$..nativeQueryRef") + filter_expr_path = parse("$..filterConfig.filters[*].filter.From") + source_ref_path = parse("$..Expression.SourceRef.Source") + metadata_ref_path = parse("$..selector.metadata") + + # Populate table alias map + alias_map = {} + for match in filter_expr_path.find(payload): + alias_list = match.value + for alias in alias_list: + alias_name = alias.get("Name") + alias_entity = alias.get("Entity") + alias_map[alias_name] = alias_entity + + # Rename selector.metadata objects + for match in metadata_ref_path.find(payload): + obj = match.value + + # Check both measures and columns + for category in ["measures", "columns"]: + if obj in selector_mapping.get(category, {}): + value = selector_mapping[category][obj] + + # Find original tuple key from mapping for this category + for tup_key in mapping.get(category, {}).keys(): + if ".".join(tup_key) == obj: + key = tup_key[ + 0 + ] # first element of tuple, like table name + new_value = f"{key}.{value}" + + # Update the dictionary node holding "metadata" + if isinstance(match.context.value, dict): + match.context.value["metadata"] = new_value + else: + print( + f"Warning: Cannot assign metadata, context is {type(match.context.value)}" + ) + break + + # Once found in one category, no need to check the other + break - self.adjust_settings( - setting_type="settings", - setting_name="hideVisualContainerHeader", - setting_value=value, - ) + # Rename Column Properties + for match in col_expr_path.find(payload): + col_obj = match.value + parent = match.context.value - def __default_cross_filtering(self, value: Optional[bool] = False): - """ - Change the default visual interaction from cross highlighting to cross filtering. - """ + # Extract table name from SourceRef + source_matches = entity_ref_path.find(parent) + if source_matches: + table = source_matches[0].value + else: + alias = source_ref_path.find(parent) + table = alias_map.get(alias[0].value) - self.adjust_settings( - setting_type="settings", - setting_name="defaultFilterActionIsDataFilter", - setting_value=value, - ) + if not table: + continue # skip if can't resolve table - def __modern_visual_header(self, value: Optional[bool] = True): - """ - Use the modern visual header with updated styling options. - """ + old_name = col_obj.get("Property") + if (table, old_name) in mapping.get("columns", {}): + col_obj["Property"] = mapping["columns"][(table, old_name)] - self.adjust_settings( - setting_type="settings", - setting_name="useStylableVisualContainerHeader", - setting_value=value, - ) + # Rename Measure Properties + for match in meas_expr_path.find(payload): + meas_obj = match.value + parent = match.context.value - def __show_default_summarization_type(self, value: Optional[bool] = True): - """ - For aggregated fields, always show the default summarization type. + source_matches = entity_ref_path.find(parent) + if source_matches: + table = source_matches[0].value + else: + alias = source_ref_path.find(parent) + table = alias_map.get(alias[0].value) + + if not table: + continue # skip if can't resolve table + + old_name = meas_obj.get("Property") + if (table, old_name) in mapping.get("measures", {}): + meas_obj["Property"] = mapping["measures"][(table, old_name)] + + # Update queryRef and nativeQueryRef + def update_refs(path_expr): + for match in path_expr.find(payload): + ref_key = match.path.fields[0] + ref_value = match.value + parent = match.context.value + + for (tbl, old_name), new_name in mapping.get("columns", {}).items(): + pattern = rf"\b{re.escape(tbl)}\.{re.escape(old_name)}\b" + if re.search(pattern, ref_value): + if ref_key == "queryRef": + ref_value = re.sub( + pattern, f"{tbl}.{new_name}", ref_value + ) + elif ref_key == "nativeQueryRef": + agg_match = re.match( + rf"(?i)([a-z]+)\s*\(\s*{re.escape(tbl)}\.{re.escape(old_name)}\s*\)", + ref_value, + ) + if agg_match: + func = agg_match.group(1).capitalize() + ref_value = f"{func} of {new_name}" + else: + ref_value = ref_value.replace(old_name, new_name) + parent[ref_key] = ref_value + + for (tbl, old_name), new_name in mapping.get( + "measures", {} + ).items(): + pattern = rf"\b{re.escape(tbl)}\.{re.escape(old_name)}\b" + if re.search(pattern, ref_value): + if ref_key == "queryRef": + ref_value = re.sub( + pattern, f"{tbl}.{new_name}", ref_value + ) + elif ref_key == "nativeQueryRef": + agg_match = re.match( + rf"(?i)([a-z]+)\s*\(\s*{re.escape(tbl)}\.{re.escape(old_name)}\s*\)", + ref_value, + ) + if agg_match: + func = agg_match.group(1).capitalize() + ref_value = f"{func} of {new_name}" + else: + ref_value = ref_value.replace(old_name, new_name) + parent[ref_key] = ref_value + + update_refs(query_ref_path) + update_refs(native_query_ref_path) + + self.update(file_path=file_path, payload=payload) + + def _list_color_codes(self) -> List[str]: + """ + Shows a list of all the hex color codes used in the report. + + Returns + ------- + list[str] + A list of hex color codes used in the report. """ + self._ensure_pbir() - self.adjust_settings( - setting_type="settings", - setting_name="useDefaultAggregateDisplayName", - setting_value=value, - ) + file = self.get("*.json", json_path="$..color.expr.Literal.Value") + + return [x[1].strip("'") for x in file] - def __modern_visual_tooltips(self, value: Optional[bool] = True): + def __update_visual_image(self, file_path: str, image_path: str): """ - Use modern visual tooltips with drill actions and updated styling. + Update the image of a visual in the report definition. Only supported for 'image' visual types. + + Parameters + ---------- + file_path : str + The file path of the visual to be updated. For example: "definition/pages/ReportSection1/visuals/a1d8f99b81dcc2d59035/visual.json". + image_path : str + The name of the image file to be added. For example: "MyImage". """ - self.adjust_settings( - setting_type="settings", - setting_name="useEnhancedTooltips", - setting_value=value, - ) + if image_path not in self.list_paths().get("Path").values: + raise ValueError( + f"Image path '{image_path}' not found in the report definition." + ) + if not image_path.startswith("StaticResources/RegisteredResources/"): + raise ValueError( + f"Image path must start with 'StaticResources/RegisteredResources/'. Provided: {image_path}" + ) - def __user_can_change_filter_types(self, value: Optional[bool] = True): - """ - Allow users to change filter types. - """ + image_name = image_path.split("RegisteredResources/")[1] - self.adjust_settings( - setting_type="settings", - setting_name="allowChangeFilterTypes", - setting_value=value, - ) + if not file_path.endswith("/visual.json"): + raise ValueError( + f"File path must end with '/visual.json'. Provided: {file_path}" + ) - def __disable_search_filter_pane(self, value: Optional[bool] = False): - """ - Enable search for the filter pane. - """ + file = self.get(file_path=file_path) + if file.get("visual").get("visualType") != "image": + raise ValueError("This function is only valid for image visuals.") + file.get("visual").get("objects").get("general")[0].get("properties").get( + "imageUrl" + ).get("expr").get("ResourcePackageItem")["ItemName"] == image_name - self.adjust_settings( - setting_type="settings", - setting_name="disableFilterPaneSearch", - setting_value=value, - ) + def save_changes(self): - def __enable_cross_report_drillthrough(self, value: Optional[bool] = False): - """ - Allow visuals in this report to use drillthrough targets from other reports. - """ + if self._readonly: + print( + f"{icons.warning} The connection is read-only. Set 'readonly' to False to save changes." + ) + else: + # Convert the report definition to base64 + if self._current_report_definition == self._report_definition: + print(f"{icons.info} No changes were made to the report definition.") + return + new_report_definition = copy.deepcopy(self._report_definition) + + for part in new_report_definition.get("parts"): + part["payloadType"] = "InlineBase64" + path = part.get("path") + payload = part.get("payload") + if isinstance(payload, dict): + converted_json = json.dumps(part["payload"]) + part["payload"] = base64.b64encode( + converted_json.encode("utf-8") + ).decode("utf-8") + elif isinstance(payload, bytes): + part["payload"] = base64.b64encode(part["payload"]).decode("utf-8") + elif is_base64(payload): + part["payload"] = payload + else: + raise NotImplementedError( + f"{icons.red_dot} Unsupported payload type: {type(payload)} for the '{path}' file." + ) - self.adjust_settings( - setting_type="settings", - setting_name="useCrossReportDrillthrough", - setting_value=value, - ) + # Generate payload for the updateDefinition API + new_payload = {"definition": {"parts": new_report_definition.get("parts")}} - def __disable_default_cross_highlighting(self, value: Optional[bool] = False): - """ - Disable cross highlighting/filtering by default. - """ + # Update item definition + _base_api( + request=f"/v1/workspaces/{self._workspace_id}/reports/{self._report_id}/updateDefinition", + method="post", + payload=new_payload, + lro_return_status_code=True, + status_codes=None, + ) + print( + f"{icons.green_dot} The report definition has been updated successfully." + ) - self.adjust_settings( - setting_type="slowDataSourceSettings", - setting_name="isCrossHighlightingDisabled", - setting_value=value, - ) + def close(self): - def __add_slicer_apply_button(self, value: Optional[bool] = False): - """ - Add an Apply button to each individual slicer (not recommended). - """ + if self._show_diffs and ( + self._current_report_definition != self._report_definition + ): + diff_parts( + self._current_report_definition.get("parts"), + self._report_definition.get("parts"), + ) + # Save the changes to the service if the connection is read/write + if not self._readonly: + self.save_changes() + + +@log +@contextmanager +def connect_report( + report: str | UUID, + workspace: Optional[str | UUID] = None, + readonly: bool = True, + show_diffs: bool = True, +): + """ + Connects to the report. - self.adjust_settings( - setting_type="slowDataSourceSettings", - setting_name="isSlicerSelectionsButtonEnabled", - setting_value=value, - ) + Parameters + ---------- + report : str | uuid.UUID + Name or ID of the report. + workspace : str | uuid.UUID, default=None + The workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + readonly: bool, default=True + Whether the connection is read-only or read/write. Setting this to False enables read/write which saves the changes made back to the server. + show_diffs: bool, default=True + Whether to show the differences between the current report definition in the service and the new report definition. - # def close(self): - # if not self._readonly and self._report is not None: - # print("saving...") + Returns + ------- + typing.Iterator[ReportWrapper] + A connection to the report's metadata. + """ - # self._report = None + rw = ReportWrapper( + report=report, + workspace=workspace, + readonly=readonly, + show_diffs=show_diffs, + ) + try: + yield rw + finally: + rw.close() diff --git a/src/sempy_labs/report/_save_report.py b/src/sempy_labs/report/_save_report.py new file mode 100644 index 00000000..ddd14fec --- /dev/null +++ b/src/sempy_labs/report/_save_report.py @@ -0,0 +1,147 @@ +import os +import base64 +import json +import sempy.fabric as fabric +import sempy_labs._icons as icons +from sempy_labs.report._generate_report import get_report_definition +from sempy_labs._generate_semantic_model import get_semantic_model_definition +from sempy_labs._helper_functions import ( + _mount, + resolve_workspace_name_and_id, + resolve_item_name, + resolve_workspace_name, + resolve_item_name_and_id, +) +from uuid import UUID +from sempy._utils._log import log +from typing import Optional + + +@log +def save_report_as_pbip( + report: str | UUID, + workspace: Optional[str | UUID] = None, + thick_report: bool = True, + live_connect: bool = True, + lakehouse: Optional[str | UUID] = None, + lakehouse_workspace: Optional[str | UUID] = None, +): + """ + Saves a report as a .pbip file to the default lakehouse attached to the notebook. + + Parameters + ---------- + report : str | uuid.UUID + Name or ID of the Power BI report. + workspace : str | uuid.UUID, default=None + The name or ID of the Fabric workspace in which the report resides. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + thick_report : bool, default=True + If set to True, saves the report and underlying semantic model. + If set to False, saves just the report. + live_connect : bool, default=True + If set to True, saves a .pbip live-connected to the workspace in the Power BI / Fabric service. + If set to False, saves a .pbip with a local model, independent from the Power BI / Fabric service. + lakehouse : str | uuid.UUID, default=None + The Fabric lakehouse name or ID. This will be the lakehouse to which the report is saved. + Defaults to None which resolves to the lakehouse attached to the notebook. + lakehouse_workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID used by the lakehouse. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + + (report_workspace_name, report_workspace_id) = resolve_workspace_name_and_id( + workspace + ) + (report_name, report_id) = resolve_item_name_and_id( + item=report, type="Report", workspace=workspace + ) + indent = 2 + + local_path = _mount(lakehouse=lakehouse, workspace=lakehouse_workspace) + save_location = f"{local_path}/Files" + + # Find semantic model info + dfR = fabric.list_reports(workspace=workspace) + dfR_filt = dfR[dfR["Id"] == report_id] + if dfR_filt.empty: + raise ValueError( + f"{icons.red_dot} The '{report} report does not exist within the '{report_workspace_name} workspace." + ) + + dataset_id = dfR_filt["Dataset Id"].iloc[0] + dataset_workspace_id = dfR_filt["Dataset Workspace Id"].iloc[0] + dataset_name = resolve_item_name(item_id=dataset_id, workspace=dataset_workspace_id) + dataset_workspace_name = resolve_workspace_name(dataset_workspace_id) + path_prefix = f"{save_location}/{report_workspace_name}/{report_name}/{report_name}" + + # Local model not supported if the report and model are in different workspaces + if dataset_workspace_name != report_workspace_name and not live_connect: + live_connect = True + print( + f"{icons.warning} The '{report_name}' report from the '{report_workspace_name}' workspace is being saved as a live-connected report/model." + ) + + def add_files(name, type, object_workspace): + + path_prefix_full = f"{path_prefix}.{type}" + + if type == "Report": + dataframe = get_report_definition(report=name, workspace=workspace) + elif type == "SemanticModel": + dataframe = get_semantic_model_definition( + dataset=name, workspace=object_workspace + ) + else: + raise NotImplementedError + + # Create and save files based on dataset/report definition + for _, r in dataframe.iterrows(): + path = r["path"] + file_content = base64.b64decode(r["payload"]) + file_path = f"{path_prefix_full}/{path}" + os.makedirs(os.path.dirname(file_path), exist_ok=True) + + # Update the definition.pbir file for local models + if not live_connect and type == "Report" and path == "definition.pbir": + file_content = { + "version": "1.0", + "datasetReference": { + "byPath": {"path": f"../{report_name}.SemanticModel"}, + "byConnection": None, + }, + } + + with open(file_path, "w") as f: + json.dump(file_content, f, indent=indent) + else: + with open(file_path, "wb") as f: + f.write(file_content) + + # Create and save .pbip file for report, converting the file extension + if type == "Report": + # Standard .pbip file content + pbip = { + "version": "1.0", + "artifacts": [{"report": {"path": f"{report_name}.Report"}}], + "settings": {"enableAutoRecovery": True}, + } + # Ensure the directory exists + os.makedirs(os.path.dirname(path_prefix), exist_ok=True) + # Write the .pbip file directly + pbip_final = f"{path_prefix}.pbip" + with open(pbip_final, "w") as file: + json.dump(pbip, file, indent=indent) + + add_files(name=report_name, type="Report", object_workspace=workspace) + if thick_report: + add_files( + name=dataset_name, + type="SemanticModel", + object_workspace=dataset_workspace_name, + ) + print( + f"{icons.green_dot} The '{report_name}' report within the '{report_workspace_name}' workspace has been saved to this location: {save_location}." + ) diff --git a/src/sempy_labs/tom/_model.py b/src/sempy_labs/tom/_model.py index 64b42b9d..e94f248a 100644 --- a/src/sempy_labs/tom/_model.py +++ b/src/sempy_labs/tom/_model.py @@ -2,6 +2,7 @@ import sempy.fabric as fabric import pandas as pd import re +import json from datetime import datetime from sempy_labs._helper_functions import ( format_dax_object_name, @@ -10,6 +11,9 @@ resolve_dataset_name_and_id, resolve_workspace_name_and_id, _base_api, + resolve_workspace_id, + resolve_item_id, + resolve_lakehouse_id, ) from sempy_labs._list_functions import list_relationships from sempy_labs._refresh_semantic_model import refresh_semantic_model @@ -43,6 +47,13 @@ class TOMWrapper: _tables_added: List[str] _table_map = dict _column_map = dict + _dax_formatting = { + "measures": [], + "calculated_columns": [], + "calculated_tables": [], + "calculation_items": [], + "rls": [], + } def __init__(self, dataset, workspace, readonly): @@ -83,7 +94,7 @@ def __init__(self, dataset, workspace, readonly): # No token provider (standard authentication) if self._token_provider is None: self._tom_server = fabric.create_tom_server( - readonly=readonly, workspace=workspace_id + dataset=dataset, readonly=readonly, workspace=workspace_id ) # Service Principal Authentication for Azure AS via token provider elif self._is_azure_as: @@ -798,23 +809,27 @@ def set_ols( if permission not in ["Read", "None", "Default"]: raise ValueError(f"{icons.red_dot} Invalid 'permission' value.") - cp = TOM.ColumnPermission() - cp.Column = self.model.Tables[table_name].Columns[column_name] - cp.MetadataPermission = System.Enum.Parse(TOM.MetadataPermission, permission) - - if any( - c.Name == column_name and t.Name == table_name and r.Name == role_name - for r in self.model.Roles - for t in r.TablePermissions - for c in t.ColumnPermissions - ): - self.model.Roles[role_name].TablePermissions[table_name].ColumnPermissions[ + r = self.model.Roles[role_name] + tables = [t.Name for t in r.TablePermissions] + # Add table permission if it does not exist + if table_name not in tables: + tp = TOM.TablePermission() + tp.Table = self.model.Tables[table_name] + r.TablePermissions.Add(tp) + columns = [c.Name for c in r.TablePermissions[table_name].ColumnPermissions] + # Add column permission if it does not exist + if column_name not in columns: + cp = TOM.ColumnPermission() + cp.Column = self.model.Tables[table_name].Columns[column_name] + cp.MetadataPermission = System.Enum.Parse( + TOM.MetadataPermission, permission + ) + r.TablePermissions[table_name].ColumnPermissions.Add(cp) + # Set column permission if it already exists + else: + r.TablePermissions[table_name].ColumnPermissions[ column_name ].MetadataPermission = System.Enum.Parse(TOM.MetadataPermission, permission) - else: - self.model.Roles[role_name].TablePermissions[ - table_name - ].ColumnPermissions.Add(cp) def add_hierarchy( self, @@ -934,19 +949,23 @@ def add_relationship( import Microsoft.AnalysisServices.Tabular as TOM import System - if cross_filtering_behavior is None: + if not cross_filtering_behavior: cross_filtering_behavior = "Automatic" - if security_filtering_behavior is None: + if not security_filtering_behavior: security_filtering_behavior = "OneDirection" - from_cardinality = from_cardinality.capitalize() - to_cardinality = to_cardinality.capitalize() - cross_filtering_behavior = cross_filtering_behavior.capitalize() - security_filtering_behavior = security_filtering_behavior.capitalize() + for var_name in [ + "from_cardinality", + "to_cardinality", + "cross_filtering_behavior", + "security_filtering_behavior", + ]: + locals()[var_name] = locals()[var_name].capitalize() + + cross_filtering_behavior = cross_filtering_behavior.replace("direct", "Direct") security_filtering_behavior = security_filtering_behavior.replace( "direct", "Direct" ) - cross_filtering_behavior = cross_filtering_behavior.replace("direct", "Direct") rel = TOM.SingleColumnRelationship() rel.FromColumn = self.model.Tables[from_table].Columns[from_column] @@ -958,13 +977,16 @@ def add_relationship( TOM.RelationshipEndCardinality, to_cardinality ) rel.IsActive = is_active - rel.CrossFilteringBehavior = System.Enum.Parse( - TOM.CrossFilteringBehavior, cross_filtering_behavior - ) - rel.SecurityFilteringBehavior = System.Enum.Parse( - TOM.SecurityFilteringBehavior, security_filtering_behavior - ) - rel.RelyOnReferentialIntegrity = rely_on_referential_integrity + if cross_filtering_behavior != "Automatic": + rel.CrossFilteringBehavior = System.Enum.Parse( + TOM.CrossFilteringBehavior, cross_filtering_behavior + ) + if security_filtering_behavior != "OneDirection": + rel.SecurityFilteringBehavior = System.Enum.Parse( + TOM.SecurityFilteringBehavior, security_filtering_behavior + ) + if rely_on_referential_integrity: + rel.RelyOnReferentialIntegrity = True self.model.Relationships.Add(rel) @@ -1171,8 +1193,8 @@ def add_entity_partition( Name of the table. entity_name : str Name of the lakehouse/warehouse table. - expression : TOM Object, default=None - The expression used by the table. + expression : str, default=None + The name of the expression used by the partition. Defaults to None which resolves to the 'DatabaseQuery' expression. description : str, default=None A description for the partition. @@ -1542,6 +1564,7 @@ def add_to_perspective( self, object: Union["TOM.Table", "TOM.Column", "TOM.Measure", "TOM.Hierarchy"], perspective_name: str, + include_all: bool = True, ): """ Adds an object to a `perspective `_. @@ -1552,6 +1575,8 @@ def add_to_perspective( An object (i.e. table/column/measure) within a semantic model. perspective_name : str Name of the perspective. + include_all : bool, default=True + Relevant to tables only, if set to True, includes all columns, measures, and hierarchies within that table in the perspective. """ import Microsoft.AnalysisServices.Tabular as TOM @@ -1577,6 +1602,8 @@ def add_to_perspective( if objectType == TOM.ObjectType.Table: pt = TOM.PerspectiveTable() + if include_all: + pt.IncludeAll = True pt.Table = object object.Model.Perspectives[perspective_name].PerspectiveTables.Add(pt) elif objectType == TOM.ObjectType.Column: @@ -2251,7 +2278,7 @@ def mark_as_date_table( if validate: dax_query = f""" - define measure '{table_name}'[test] = + define measure '{table_name}'[test] = var mn = MIN('{table_name}'[{column_name}]) var ma = MAX('{table_name}'[{column_name}]) var x = COUNTROWS(DISTINCT('{table_name}'[{column_name}])) @@ -3274,17 +3301,28 @@ def depends_on(self, object, dependencies: pd.DataFrame): """ import Microsoft.AnalysisServices.Tabular as TOM - objType = object.ObjectType - objName = object.Name - objParentName = object.Parent.Name + obj_type = object.ObjectType + obj_name = object.Name - if objType == TOM.ObjectType.Table: - objParentName = objName + if object.ObjectType == TOM.ObjectType.CalculationItem: + obj_parent_name = object.Parent.Table.Name + else: + obj_parent_name = object.Parent.Name + + if obj_type == TOM.ObjectType.Table: + obj_parent_name = obj_name + object_types = ["Table", "Calc Table"] + elif obj_type == TOM.ObjectType.Column: + object_types = ["Column", "Calc Column"] + elif obj_type == TOM.ObjectType.CalculationItem: + object_types = ["Calculation Item"] + else: + object_types = [str(obj_type)] fil = dependencies[ - (dependencies["Object Type"] == str(objType)) - & (dependencies["Table Name"] == objParentName) - & (dependencies["Object Name"] == objName) + (dependencies["Object Type"].isin(object_types)) + & (dependencies["Table Name"] == obj_parent_name) + & (dependencies["Object Name"] == obj_name) ] meas = ( fil[fil["Referenced Object Type"] == "Measure"]["Referenced Object"] @@ -3292,14 +3330,16 @@ def depends_on(self, object, dependencies: pd.DataFrame): .tolist() ) cols = ( - fil[fil["Referenced Object Type"] == "Column"][ + fil[fil["Referenced Object Type"].isin(["Column", "Calc Column"])][ "Referenced Full Object Name" ] .unique() .tolist() ) tbls = ( - fil[fil["Referenced Object Type"] == "Table"]["Referenced Table"] + fil[fil["Referenced Object Type"].isin(["Table", "Calc Table"])][ + "Referenced Table" + ] .unique() .tolist() ) @@ -3364,6 +3404,41 @@ def referenced_by(self, object, dependencies: pd.DataFrame): if t.Name in tbls: yield t + def _get_expression(self, object): + """ + Helper function to get the expression for any given TOM object. + """ + + import Microsoft.AnalysisServices.Tabular as TOM + + valid_objects = [ + TOM.ObjectType.Measure, + TOM.ObjectType.Table, + TOM.ObjectType.Column, + TOM.ObjectType.CalculationItem, + ] + + if object.ObjectType not in valid_objects: + raise ValueError( + f"{icons.red_dot} The 'object' parameter must be one of these types: {valid_objects}." + ) + + if object.ObjectType == TOM.ObjectType.Measure: + expr = object.Expression + elif object.ObjectType == TOM.ObjectType.Table: + part = next(p for p in object.Partitions) + if part.SourceType == TOM.PartitionSourceType.Calculated: + expr = part.Source.Expression + elif object.ObjectType == TOM.ObjectType.Column: + if object.Type == TOM.ColumnType.Calculated: + expr = object.Expression + elif object.ObjectType == TOM.ObjectType.CalculationItem: + expr = object.Expression + else: + return + + return expr + def fully_qualified_measures( self, object: "TOM.Measure", dependencies: pd.DataFrame ): @@ -3388,15 +3463,16 @@ def fully_qualified_measures( dependencies["Object Name"] == dependencies["Parent Node"] ] + expr = self._get_expression(object=object) + for obj in self.depends_on(object=object, dependencies=dependencies): if obj.ObjectType == TOM.ObjectType.Measure: - if (f"{obj.Parent.Name}[{obj.Name}]" in object.Expression) or ( - format_dax_object_name(obj.Parent.Name, obj.Name) - in object.Expression + if (f"{obj.Parent.Name}[{obj.Name}]" in expr) or ( + format_dax_object_name(obj.Parent.Name, obj.Name) in expr ): yield obj - def unqualified_columns(self, object: "TOM.Column", dependencies: pd.DataFrame): + def unqualified_columns(self, object, dependencies: pd.DataFrame): """ Obtains all unqualified column references for a given object. @@ -3418,6 +3494,8 @@ def unqualified_columns(self, object: "TOM.Column", dependencies: pd.DataFrame): dependencies["Object Name"] == dependencies["Parent Node"] ] + expr = self._get_expression(object=object) + def create_pattern(tableList, b): patterns = [ r"(?`_ policy. Parameters ---------- - table_name : str - Name of the table. + object : TOM Object + The TOM object within the semantic model. Accepts either a table or the model object. Returns ------- @@ -3482,13 +3560,21 @@ def has_incremental_refresh_policy(self, table_name: str): An indicator whether a table has an incremental refresh policy. """ - hasRP = False - rp = self.model.Tables[table_name].RefreshPolicy - - if rp is not None: - hasRP = True + import Microsoft.AnalysisServices.Tabular as TOM - return hasRP + if object.ObjectType == TOM.ObjectType.Table: + if object.RefreshPolicy is not None: + return True + else: + return False + elif object.ObjectType == TOM.ObjectType.Model: + rp = False + for t in self.model.Tables: + if t.RefreshPolicy is not None: + rp = True + return rp + else: + raise NotImplementedError def show_incremental_refresh_policy(self, table_name: str): """ @@ -3587,25 +3673,27 @@ def update_incremental_refresh_policy( import Microsoft.AnalysisServices.Tabular as TOM import System - if not self.has_incremental_refresh_policy(table_name=table_name): + if not self.has_incremental_refresh_policy( + object=self.model.Tables[table_name] + ): print( f"The '{table_name}' table does not have an incremental refresh policy." ) return - incGran = ["Day", "Month", "Quarter", "Year"] + granularities = ["Day", "Month", "Quarter", "Year"] incremental_granularity = incremental_granularity.capitalize() rolling_window_granularity = rolling_window_granularity.capitalize() - if incremental_granularity not in incGran: + if incremental_granularity not in granularities: raise ValueError( - f"{icons.red_dot} Invalid 'incremental_granularity' value. Please choose from the following options: {incGran}." + f"{icons.red_dot} Invalid 'incremental_granularity' value. Please choose from the following options: {granularities}." ) - if rolling_window_granularity not in incGran: + if rolling_window_granularity not in granularities: raise ValueError( - f"{icons.red_dot} Invalid 'rolling_window_granularity' value. Please choose from the following options: {incGran}." + f"{icons.red_dot} Invalid 'rolling_window_granularity' value. Please choose from the following options: {granularities}." ) if rolling_window_periods < 1: @@ -4635,7 +4723,12 @@ def set_value_filter_behavior(self, value_filter_behavior: str = "Automatic"): TOM.ValueFilterBehaviorType, value_filter_behavior ) - def add_role_member(self, role_name: str, member: str | List[str]): + def add_role_member( + self, + role_name: str, + member: str | List[str], + role_member_type: Optional[str] = "User", + ): """ Adds an external model role member (AzureAD) to a role. @@ -4645,13 +4738,23 @@ def add_role_member(self, role_name: str, member: str | List[str]): The role name. member : str | List[str] The email address(es) of the member(s) to add. + role_member_type : str, default="User" + The type of the role member. Default is "User". Other options include "Group" for Azure AD groups. + All members must be of the same role_member_type. """ import Microsoft.AnalysisServices.Tabular as TOM + import System if isinstance(member, str): member = [member] + role_member_type = role_member_type.capitalize() + if role_member_type not in ["User", "Group"]: + raise ValueError( + f"{icons.red_dot} The '{role_member_type}' is not a valid role member type. Valid options: 'User', 'Group'." + ) + role = self.model.Roles[role_name] current_members = [m.MemberName for m in role.Members] @@ -4660,6 +4763,7 @@ def add_role_member(self, role_name: str, member: str | List[str]): rm = TOM.ExternalModelRoleMember() rm.IdentityProvider = "AzureAD" rm.MemberName = m + rm.MemberType = System.Enum.Parse(TOM.RoleMemberType, role_member_type) role.Members.Add(rm) print( f"{icons.green_dot} '{m}' has been added as a member of the '{role_name}' role." @@ -4698,8 +4802,536 @@ def remove_role_member(self, role_name: str, member: str | List[str]): f"{icons.yellow_dot} '{m}' is not a member of the '{role_name}' role." ) + def get_bim(self) -> dict: + """ + Retrieves the .bim file for the semantic model. + + Returns + ------- + dict + The .bim file. + """ + + import Microsoft.AnalysisServices.Tabular as TOM + + bim = ( + json.loads(TOM.JsonScripter.ScriptCreate(self.model.Database)) + .get("create") + .get("database") + ) + + return bim + + def _reduce_model(self, perspective_name: str): + """ + Reduces a model's objects based on a perspective. Adds the dependent objects within a perspective to that perspective. + """ + + import Microsoft.AnalysisServices.Tabular as TOM + from sempy_labs._model_dependencies import get_model_calc_dependencies + + fabric.refresh_tom_cache(workspace=self._workspace_id) + dfP = fabric.list_perspectives( + dataset=self._dataset_id, workspace=self._workspace_id + ) + dfP = dfP[dfP["Perspective Name"] == perspective_name] + if dfP.empty: + raise ValueError( + f"{icons.red_dot} The '{perspective_name}' is not a valid perspective in the '{self._dataset_name}' semantic model within the '{self._workspace_name}' workspace." + ) + + dep = get_model_calc_dependencies( + dataset=self._dataset_id, workspace=self._workspace_id + ) + dep_filt = dep[ + dep["Object Type"].isin( + [ + "Rows Allowed", + "Measure", + "Calc Item", + "Calc Column", + "Calc Table", + "Hierarchy", + ] + ) + ] + + tables = dfP[dfP["Object Type"] == "Table"]["Table Name"].tolist() + measures = dfP[dfP["Object Type"] == "Measure"]["Object Name"].tolist() + columns = dfP[dfP["Object Type"] == "Column"][["Table Name", "Object Name"]] + cols = [ + f"'{row[0]}'[{row[1]}]" + for row in columns.itertuples(index=False, name=None) + ] + hierarchies = dfP[dfP["Object Type"] == "Hierarchy"][ + ["Table Name", "Object Name"] + ] + hier = [ + f"'{row[0]}'[{row[1]}]" + for row in hierarchies.itertuples(index=False, name=None) + ] + filt = dep_filt[ + (dep_filt["Object Type"].isin(["Rows Allowed", "Calc Item"])) + | (dep_filt["Object Type"] == "Measure") + & (dep_filt["Object Name"].isin(measures)) + | (dep_filt["Object Type"] == "Calc Table") + & (dep_filt["Object Name"].isin(tables)) + | ( + (dep_filt["Object Type"].isin(["Calc Column"])) + & ( + dep_filt.apply( + lambda row: f"'{row['Table Name']}'[{row['Object Name']}]", + axis=1, + ).isin(cols) + ) + ) + | ( + (dep_filt["Object Type"].isin(["Hierarchy"])) + & ( + dep_filt.apply( + lambda row: f"'{row['Table Name']}'[{row['Object Name']}]", + axis=1, + ).isin(hier) + ) + ) + ] + + result_df = pd.DataFrame(columns=["Table Name", "Object Name", "Object Type"]) + + def add_to_result(table_name, object_name, object_type, dataframe): + + new_data = { + "Table Name": table_name, + "Object Name": object_name, + "Object Type": object_type, + } + + return pd.concat( + [dataframe, pd.DataFrame(new_data, index=[0])], ignore_index=True + ) + + for _, r in filt.iterrows(): + added = False + obj_type = r["Referenced Object Type"] + table_name = r["Referenced Table"] + object_name = r["Referenced Object"] + if obj_type in ["Column", "Attribute Hierarchy"]: + obj = self.model.Tables[table_name].Columns[object_name] + if not self.in_perspective( + object=obj, perspective_name=perspective_name + ): + self.add_to_perspective( + object=obj, perspective_name=perspective_name, include_all=False + ) + added = True + elif obj_type == "Measure": + obj = self.model.Tables[table_name].Measures[object_name] + if not self.in_perspective( + object=obj, perspective_name=perspective_name + ): + self.add_to_perspective( + object=obj, perspective_name=perspective_name, include_all=False + ) + added = True + elif obj_type == "Table": + obj = self.model.Tables[table_name] + if not self.in_perspective( + object=obj, perspective_name=perspective_name + ): + self.add_to_perspective( + object=obj, perspective_name=perspective_name, include_all=False + ) + added = True + if added: + result_df = add_to_result(table_name, object_name, obj_type, result_df) + + # Reduce model... + + # Remove unnecessary relationships + for r in self.model.Relationships: + if ( + not self.in_perspective( + object=r.FromTable, perspective_name=perspective_name + ) + ) or ( + not self.in_perspective( + object=r.ToTable, perspective_name=perspective_name + ) + ): + self.remove_object(object=r) + + # Ensure relationships in reduced model have base columns + for r in self.model.Relationships: + if not self.in_perspective(r.FromColumn, perspective_name=perspective_name): + self.add_to_perspective( + object=r.FromColumn, perspective_name=perspective_name + ) + + result_df = add_to_result( + r.FromTable.Name, r.FromColumn.Name, "Column", result_df + ) + if not self.in_perspective(r.ToColumn, perspective_name=perspective_name): + table_name = r.ToTable.Name + object_name = r.ToColumn.Name + self.add_to_perspective( + object=r.ToColumn, perspective_name=perspective_name + ) + + result_df = add_to_result( + r.ToTable.Name, r.ToColumn.Name, "Column", result_df + ) + + # Remove objects not in the perspective + for t in self.model.Tables: + if not self.in_perspective(object=t, perspective_name=perspective_name): + self.remove_object(object=t) + else: + for attr in ["Columns", "Measures", "Hierarchies"]: + for obj in getattr(t, attr): + if attr == "Columns" and obj.Type == TOM.ColumnType.RowNumber: + pass + elif not self.in_perspective( + object=obj, perspective_name=perspective_name + ): + self.remove_object(object=obj) + + # Return the objects added to the perspective based on dependencies + return result_df.drop_duplicates() + + def convert_direct_lake_to_import( + self, + table_name: str, + entity_name: Optional[str] = None, + schema: Optional[str] = None, + source: Optional[str | UUID] = None, + source_type: str = "Lakehouse", + source_workspace: Optional[str | UUID] = None, + ): + """ + Converts a Direct Lake table's partition to an import-mode partition. + + The entity_name and schema parameters default to using the existing values in the Direct Lake partition. The source, source_type, and source_workspace + parameters do not default to existing values. This is because it may not always be possible to reconcile the source and its workspace. + + Parameters + ---------- + table_name : str + The table name. + entity_name : str, default=None + The entity name of the Direct Lake partition (the table name in the source). + schema : str, default=None + The schema of the source table. Defaults to None which resolves to the existing schema. + source : str | uuid.UUID, default=None + The source name or ID. This is the name or ID of the Lakehouse or Warehouse. + source_type : str, default="Lakehouse" + The source type (i.e. "Lakehouse" or "Warehouse"). + source_workspace: str | uuid.UUID, default=None + The workspace name or ID of the source. This is the workspace in which the Lakehouse or Warehouse exists. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + """ + import Microsoft.AnalysisServices.Tabular as TOM + + p = next(p for p in self.model.Tables[table_name].Partitions) + if p.Mode != TOM.ModeType.DirectLake: + print(f"{icons.info} The '{table_name}' table is not in Direct Lake mode.") + return + + partition_name = p.Name + partition_entity_name = entity_name or p.Source.EntityName + partition_schema = schema or p.Source.SchemaName + + # Update name of the Direct Lake partition (will be removed later) + self.model.Tables[table_name].Partitions[ + partition_name + ].Name = f"{partition_name}_remove" + + source_workspace_id = resolve_workspace_id(workspace=source_workspace) + if source_type == "Lakehouse": + item_id = resolve_lakehouse_id( + lakehouse=source, workspace=source_workspace_id + ) + else: + item_id = resolve_item_id( + item=source, type=source_type, workspace=source_workspace_id + ) + + def _generate_m_expression( + workspace_id, artifact_id, artifact_type, table_name, schema_name + ): + """ + Generates the M expression for the import partition. + """ + + if artifact_type == "Lakehouse": + type_id = "lakehouseId" + elif artifact_type == "Warehouse": + type_id = "warehouseId" + else: + raise NotImplementedError + + full_table_name = ( + f"{schema_name}.{table_name}" if schema_name else table_name + ) + + return f"""let + Source = {artifact_type}.Contents(null), + #"Workspace" = Source{{[workspaceId="{workspace_id}"]}}[Data], + #"Artifact" = #"Workspace"{{[{type_id}="{artifact_id}"]}}[Data], + result = #"Artifact"{{[Id="{full_table_name}",ItemKind="Table"]}}[Data] + in + result + """ + + m_expression = _generate_m_expression( + source_workspace_id, + item_id, + source_type, + partition_entity_name, + partition_schema, + ) + + # Add the import partition + self.add_m_partition( + table_name=table_name, + partition_name=f"{partition_name}", + expression=m_expression, + mode="Import", + ) + # Remove the Direct Lake partition + self.remove_object(object=p) + + print( + f"{icons.green_dot} The '{table_name}' table has been converted to Import mode." + ) + + def copy_object( + self, + object, + target_dataset: str | UUID, + target_workspace: Optional[str | UUID] = None, + readonly: bool = False, + ): + """ + Copies a semantic model object from the current semantic model to the target semantic model. + + Parameters + ---------- + object : TOM Object + The TOM object to be copied to the target semantic model. For example: tom.model.Tables['Sales']. + target_dataset : str | uuid.UUID + Name or ID of the target semantic model. + target_workspace : str | uuid.UUID, default=None + The Fabric workspace name or ID. + Defaults to None which resolves to the workspace of the attached lakehouse + or if no lakehouse attached, resolves to the workspace of the notebook. + readonly : bool, default=False + Whether the connection is read-only or read/write. Setting this to False enables read/write which saves the changes made back to the server. + """ + + import Microsoft.AnalysisServices.Tabular as TOM + + clone = object.Clone() + with connect_semantic_model( + dataset=target_dataset, + workspace=target_workspace, + readonly=readonly, + ) as target_tom: + if isinstance(object, TOM.Table): + target_tom.model.Tables.Add(clone) + elif isinstance(object, TOM.Column): + target_tom.model.Tables[object.Parent.Name].Columns.Add(clone) + elif isinstance(object, TOM.Measure): + target_tom.model.Tables[object.Parent.Name].Measures.Add(clone) + elif isinstance(object, TOM.Hierarchy): + target_tom.model.Tables[object.Parent.Name].Hierarchies.Add(clone) + elif isinstance(object, TOM.Level): + target_tom.model.Tables[object.Parent.Parent.Name].Hierarchies[ + object.Parent.Name + ].Levels.Add(clone) + elif isinstance(object, TOM.Role): + target_tom.model.Roles.Add(clone) + elif isinstance(object, TOM.Relationship): + target_tom.model.Relationships.Add(clone) + else: + raise NotImplementedError( + f"{icons.red_dot} The '{object.ObjectType}' object type is not supported." + ) + print( + f"{icons.green_dot} The '{object.Name}' {str(object.ObjectType).lower()} has been copied to the '{target_dataset}' semantic model within the '{target_workspace}' workspace." + ) + + def format_dax( + self, + object: Optional[ + Union[ + "TOM.Measure", + "TOM.CalcultedColumn", + "TOM.CalculationItem", + "TOM.CalculatedTable", + "TOM.TablePermission", + ] + ] = None, + ): + """ + Formats the DAX expressions of measures, calculated columns, calculation items, calculated tables and row level security expressions in the semantic model. + + This function uses the `DAX Formatter API `_. + + Parameters + ---------- + object : TOM Object, default=None + The TOM object to format. If None, formats all measures, calculated columns, calculation items, calculated tables and row level security expressions in the semantic model. + If a specific object is provided, only that object will be formatted. + """ + + import Microsoft.AnalysisServices.Tabular as TOM + + if object is None: + object_map = { + "measures": self.all_measures, + "calculated_columns": self.all_calculated_columns, + "calculation_items": self.all_calculation_items, + "calculated_tables": self.all_calculated_tables, + "rls": self.all_rls, + } + + for key, func in object_map.items(): + for obj in func(): + if key == "calculated_tables": + p = next(p for p in obj.Partitions) + name = obj.Name + expr = p.Source.Expression + table = obj.Name + elif key == "calculation_items": + name = obj.Name + expr = obj.Expression + table = obj.Parent.Table.Name + elif key == "rls": + name = obj.Role.Name + expr = obj.FilterExpression + table = obj.Table.Name + else: + name = obj.Name + expr = obj.Expression + table = obj.Table.Name + self._dax_formatting[key].append( + { + "name": name, + "expression": expr, + "table": table, + } + ) + return + + if object.ObjectType == TOM.ObjectType.Measure: + self._dax_formatting["measures"].append( + { + "name": object.Name, + "expression": object.Expression, + "table": object.Parent.Name, + } + ) + elif object.ObjectType == TOM.ObjectType.CalculatedColumn: + self._dax_formatting["measures"].append( + { + "name": object.Name, + "expression": object.Expression, + "table": object.Parent.Name, + } + ) + elif object.ObjectType == TOM.ObjectType.CalculationItem: + self._dax_formatting["measures"].append( + { + "name": object.Name, + "expression": object.Expression, + "table": object.Parent.Name, + } + ) + elif object.ObjectType == TOM.ObjectType.CalculatedTable: + self._dax_formatting["measures"].append( + { + "name": object.Name, + "expression": object.Expression, + "table": object.Name, + } + ) + else: + raise ValueError( + f"{icons.red_dot} The '{str(object.ObjectType)}' object type is not supported for DAX formatting." + ) + def close(self): + # DAX Formatting + from sempy_labs._daxformatter import _format_dax + + def _process_dax_objects(object_type, model_accessor=None): + items = self._dax_formatting.get(object_type, []) + if not items: + return False + + # Extract and format expressions + expressions = [item["expression"] for item in items] + metadata = [ + {"name": item["name"], "table": item["table"], "type": object_type} + for item in items + ] + + formatted_expressions = _format_dax(expressions, metadata=metadata) + + # Update the expressions in the original structure + for item, formatted in zip(items, formatted_expressions): + item["expression"] = formatted + + # Apply updated expressions to the model + for item in items: + table_name = ( + item["table"] + if object_type != "calculated_tables" + else item["name"] + ) + name = item["name"] + expression = item["expression"] + + if object_type == "calculated_tables": + t = self.model.Tables[table_name] + p = next(p for p in t.Partitions) + p.Source.Expression = expression + elif object_type == "rls": + self.model.Roles[name].TablePermissions[ + table_name + ].FilterExpression = expression + elif object_type == "calculation_items": + self.model.Tables[table_name].CalculationGroup.CalculationItems[ + name + ].Expression = expression + else: + getattr(self.model.Tables[table_name], model_accessor)[ + name + ].Expression = expression + return True + + # Use the helper for each object type + a = _process_dax_objects("measures", "Measures") + b = _process_dax_objects("calculated_columns", "Columns") + c = _process_dax_objects("calculation_items") + d = _process_dax_objects("calculated_tables") + e = _process_dax_objects("rls") + if any([a, b, c, d, e]) and not self._readonly: + from IPython.display import display, HTML + + html = """ + + CODE BEAUTIFIED WITH + + + DAX FORMATTER + + """ + + display(HTML(html)) + if not self._readonly and self.model is not None: import Microsoft.AnalysisServices.Tabular as TOM @@ -4711,18 +5343,25 @@ def close(self): p.SourceType == TOM.PartitionSourceType.Entity for p in t.Partitions ): - if t.LineageTag in list(self._table_map.keys()): - if self._table_map.get(t.LineageTag) != t.Name: - self.add_changed_property(object=t, property="Name") + entity_name = next(p.Source.EntityName for p in t.Partitions) + if t.Name != entity_name: + self.add_changed_property(object=t, property="Name") + # if t.LineageTag in list(self._table_map.keys()): + # if self._table_map.get(t.LineageTag) != t.Name: + # self.add_changed_property(object=t, property="Name") for c in self.all_columns(): + # if c.LineageTag in list(self._column_map.keys()): + if any( + p.SourceType == TOM.PartitionSourceType.Entity + for p in c.Parent.Partitions + ): + if c.Name != c.SourceColumn: + self.add_changed_property(object=c, property="Name") + # c.SourceLineageTag = c.SourceColumn + # if self._column_map.get(c.LineageTag)[0] != c.Name: + # self.add_changed_property(object=c, property="Name") if c.LineageTag in list(self._column_map.keys()): - if any( - p.SourceType == TOM.PartitionSourceType.Entity - for p in c.Parent.Partitions - ): - if self._column_map.get(c.LineageTag)[0] != c.Name: - self.add_changed_property(object=c, property="Name") if self._column_map.get(c.LineageTag)[1] != c.DataType: self.add_changed_property(object=c, property="DataType") @@ -4787,6 +5426,7 @@ def connect_semantic_model( If connecting to Azure Analysis Services, enter the workspace parameter in the following format: 'asazure://.asazure.windows.net/'. Defaults to None which resolves to the workspace of the attached lakehouse or if no lakehouse attached, resolves to the workspace of the notebook. + Returns ------- typing.Iterator[TOMWrapper] diff --git a/tests/test_shortcuts.py b/tests/test_shortcuts.py deleted file mode 100644 index fa1844ea..00000000 --- a/tests/test_shortcuts.py +++ /dev/null @@ -1,57 +0,0 @@ -import pandas as pd -from sempy_labs.lakehouse._shortcuts import create_shortcut_onelake -from unittest.mock import MagicMock, PropertyMock, patch - - -@patch("sempy.fabric.resolve_item_name") -@patch("sempy.fabric.list_items") -@patch("sempy.fabric.resolve_workspace_id") -@patch("sempy.fabric.resolve_item_id") -@patch("sempy.fabric.FabricRestClient") -def test_create_shortcut_onelake(fabric_rest_client_mock, resolve_item_id_mock, resolve_workspace_id_mock, list_items_mock, resolve_item_name_mock): - # prepare mocks - def resolve_workspace_id_mock_side_effect(workspace_name): - if workspace_name == "source_workspace": - return "00000000-0000-0000-0000-000000000001" - - if workspace_name == "destination_workspace": - return "00000000-0000-0000-0000-000000000002" - - assert False, f"Unexpected workspace: {workspace_name}" - - resolve_workspace_id_mock.side_effect = resolve_workspace_id_mock_side_effect - - resolve_item_id_mock.return_value = "00000000-0000-0000-0000-00000000000A" - resolve_item_name_mock.return_value = "My item" - - def list_items_side_effect(type, workspace): - assert type == "Lakehouse" - - if workspace == "source_workspace": - return pd.DataFrame([{ - "Display Name": "source_lakehouse_id", - "Id": "10000000-0000-0000-0000-000000000001" - }]) - - if workspace == "destination_workspace": - return pd.DataFrame([{ - "Display Name": "destination_lakehouse_id", - "Id": "20000000-0000-0000-0000-000000000002" - }]) - - assert False, f"Unexpected workspace: {workspace}" - - list_items_mock.side_effect = list_items_side_effect - - def post_side_effect(url, json): - # TODO: we could validate the URL and JSON? - - response = MagicMock() - type(response).status_code = PropertyMock(return_value=201) - - return response - - fabric_rest_client_mock.return_value.post.side_effect = post_side_effect - - # execute - create_shortcut_onelake("table_name", "source_lakehouse", "source_workspace", "destination_lakehouse", "destination_workspace", "shortcut_name") diff --git a/tests/test_tom.py b/tests/test_tom.py deleted file mode 100644 index 0bab35eb..00000000 --- a/tests/test_tom.py +++ /dev/null @@ -1,40 +0,0 @@ -import os -import sempy.fabric -from unittest.mock import patch -from sempy_labs.tom import connect_semantic_model -import os - - -@patch("sempy.fabric.resolve_item_id") -@patch("sempy.fabric.resolve_workspace_id") -@patch("sempy_labs._helper_functions.resolve_dataset_name_and_id") -@patch("sempy_labs._helper_functions.resolve_workspace_name_and_id") -@patch("sempy.fabric.create_tom_server") -def test_tom_wrapper(create_tom_server, resolve_workspace_name_and_id, resolve_dataset_name_and_id, resolve_workspace_id, resolve_item_id): - - os.environ["DOTNET_SYSTEM_GLOBALIZATION_INVARIANT"] = "true" - sempy.fabric._client._utils._init_analysis_services() - import Microsoft.AnalysisServices.Tabular as TOM - - resolve_workspace_name_and_id.return_value = ("my_workspace", "my_workspace_id") - resolve_dataset_name_and_id.return_value = ("my_dataset", "my_dataset_id") - resolve_workspace_id.return_value = "my_workspace_id" - resolve_item_id.return_value = "my_dataset_id" - - # create dummy server, database and model - tom_server = TOM.Server() - - db = TOM.Database() - db.Name = "my_dataset" - db.ID = "my_dataset_id" - db.Model = TOM.Model() - tom_server.Databases.Add(db) - - create_tom_server.return_value = tom_server - - # invoke the wrapper - with connect_semantic_model(dataset="my_dataset_id", workspace="my_workspace") as tom: - tom.add_table("my_table") - - # validate the result - assert tom_server.Databases["my_dataset_id"].Model.Tables["my_table"] is not None