Copy Files recursively to AIDP from Databricks Folder.

nishgit · nishgit · commit f0853722f390 · 2025-10-23T15:35:49.000+05:30
diff --git a/aidp_migration/databricks_to_aidp/recursive-files-to-aidp.ipynb b/aidp_migration/databricks_to_aidp/recursive-files-to-aidp.ipynb
@@ -0,0 +1,88 @@
+{
+  "metadata": {
+    "kernelspec": {
+      "name": "notebook"
+    },
+    "language_info": {
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python"
+    },
+    "Last_Active_Cell_Index": 5
+  },
+  "nbformat_minor": 5,
+  "nbformat": 4,
+  "cells": [
+    {
+      "id": "aedb2989-04d1-4e7d-894d-aff632ce0297",
+      "cell_type": "markdown",
+      "source": "Oracle AI Data Platform v1.0\n\nCopyright © 2025, Oracle and/or its affiliates.\n\nLicensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/",
+      "metadata": {
+        "type": "markdown"
+      }
+    },
+    {
+      "id": "a7f00219-2c74-4cba-a12b-67d2eb829168",
+      "cell_type": "markdown",
+      "source": "### Sample Code: Exporting Databricks Files to AIDP.\n\nThis example demonstrates how to export files recursively from databricks workspace using `databricks-sdk` Library and write to an **AIDP**.\n\n**Note:** \n\n- Replace all placeholders (e.g., `<DATABRICKS_WORKSPACE_URL>`, `<DATABRICKS_TOKEN>`, `<DATABRICKS_PATH>`, `<AIDP_PATH>` etc.) with values specific to your environment before running the notebook. \n- Provide Source to Target String replacement if you wish to do while importing to AIDP.\n- Use with caution: The notebook is designed for exporting notebooks & code related files only.",
+      "metadata": {
+        "type": "markdown"
+      }
+    },
+    {
+      "id": "6df91459-a48a-4920-9d16-973c61bee150",
+      "cell_type": "code",
+      "source": "import os\nimport base64\nfrom databricks.sdk import WorkspaceClient\nfrom databricks.sdk.service import workspace",
+      "metadata": {
+        "type": "python",
+        "trusted": true
+      },
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "id": "4a233ed1-458c-47bd-9444-63622ea8cf6b",
+      "cell_type": "code",
+      "source": "#Databricks Workspace URL\ndatabricks_workspace_url = \"DATABRICKS_WORKSPACE_URL\"\n#Databricks Token\ndatabricks_token = \"DATABRICKS_TOKEN\"\n# Define the Databricks folder you want to export\ndatabricks_path = \"DATABRICKS_PATH\"\n# Define the local AIDP directory to write the exported content\naidp_path = \"AIDP_PATH\"",
+      "metadata": {
+        "type": "python",
+        "trusted": true
+      },
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "id": "4b8ab52e-0885-4ae2-8864-5b7563d20b79",
+      "cell_type": "code",
+      "source": "#Provide Comma Seperated mapping to replace Source String with Target String. These are just string replacement so mapping should be provided carefully.\ndbx_to_aidp_replacement_mappings = {\n    \"SOURCE_STR_1\": \"TARGET_STR_1\",\n    \"SOURCE_STR_2\": \"TARGET_STR_2\"\n}",
+      "metadata": {
+        "type": "python",
+        "trusted": true
+      },
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "id": "c9ae41ae-4e57-4fd9-8a59-f360a3cb60ad",
+      "cell_type": "code",
+      "source": "#Recursively exports a Databricks workspace folder to a local directory, preserving the nested folder structure and exporting notebooks as .ipynb files.\n\ndef export_folder_recursively(databricks_path: str , aidp_path: str , w: WorkspaceClient):\n\n    try:\n        # List contents of the current workspace path\n        contents = w.workspace.list(path=databricks_path)\n    except Exception as e:\n        print(f\"Failed to list contents of Databricks path {databricks_path}: {e}\")\n        return\n\n    for item in contents:\n        dbx_item_path = item.path\n\n        # Determine the relative path to maintain the nested structure\n        dbx_relative_path = os.path.relpath(dbx_item_path , databricks_path)\n        aidp_full_path = os.path.join(aidp_path , dbx_relative_path)\n\n        if item.object_type == workspace.ObjectType.DIRECTORY:\n            # Create the local directory and recurse into it\n            os.makedirs(aidp_full_path , exist_ok=True)\n            print(f\"Created local directory: {aidp_full_path}\")\n            export_folder_recursively(dbx_item_path , aidp_full_path , w)\n        elif item.object_type == workspace.ObjectType.FILE or item.object_type == workspace.ObjectType.NOTEBOOK:\n            file_name = os.path.basename(dbx_item_path)\n            if item.object_type == workspace.ObjectType.NOTEBOOK:\n                local_file_path = os.path.join(os.path.dirname(aidp_full_path) , f\"{file_name}.ipynb\")\n                format = workspace.ExportFormat.JUPYTER\n            else:\n                local_file_path = os.path.join(os.path.dirname(aidp_full_path) , file_name)\n                format = workspace.ExportFormat.SOURCE\n\n            try:\n                # Export the file/notebook content\n                print(f\"Exporting File/Notebook: {dbx_item_path} to {local_file_path}\")\n                dbx_file_content = w.workspace.export(\n                    path=dbx_item_path ,\n                    format=format\n                )\n\n                \n                binary_content = base64.b64decode(dbx_file_content.content)\n                code_string = binary_content.decode('utf-8')\n                \n                # Iterate through the mapping and replace content\n                for dbx_str, aidp_str in dbx_to_aidp_replacement_mappings.items():\n                    code_string = code_string.replace(dbx_str, aidp_str)\n                \n                modified_binary_content = code_string.encode('utf-8')\n\n                with open(local_file_path , \"wb\") as f:\n                    f.write(modified_binary_content)\n\n                print(f\"Downloaded File: {file_name} as {local_file_path}\")\n\n            except Exception as export_error:\n                print(f\"Failed to export notebook {dbx_item_path}: {export_error}\")\n\n        else:\n            print(f\"Skipping unsupported object type: {item.object_type} at {dbx_item_path}\")",
+      "metadata": {
+        "type": "python",
+        "trusted": true
+      },
+      "outputs": [],
+      "execution_count": null
+    },
+    {
+      "id": "adaeed13-c355-4503-90bc-9aa8262c30cb",
+      "cell_type": "code",
+      "source": "# Initialize the WorkspaceClient\nw = WorkspaceClient(\n    host=databricks_workspace_url ,\n    token=databricks_token ,\n)\n\nprint(f\"Starting export from Databricks path '{databricks_path}' to local path '{aidp_path}'\")\n\n# Create AIDP local directory if not exists.\nos.makedirs(aidp_path , exist_ok=True)\n\n# Start the recursive export\nexport_folder_recursively(databricks_path , aidp_path , w)\n\nprint(\"\\nExport process finished.\")",
+      "metadata": {
+        "type": "python",
+        "trusted": true
+      },
+      "outputs": [],
+      "execution_count": null
+    }
+  ]
+}
diff --git a/aidp_migration/databricks_to_aidp/requirements.txt b/aidp_migration/databricks_to_aidp/requirements.txt
@@ -0,0 +1 @@
+databricks-sdk