diff --git a/notebooks/multimodal/multimodal_dataframe.ipynb b/notebooks/multimodal/multimodal_dataframe.ipynb index 29c2bd468a..b98a5e7337 100644 --- a/notebooks/multimodal/multimodal_dataframe.ipynb +++ b/notebooks/multimodal/multimodal_dataframe.ipynb @@ -92,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -128,6 +128,38 @@ "import bigframes.bigquery as bbq" ] }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import bigframes.bigquery as bbq\n", + "\n", + "def get_runtime_json_str(series, mode=\"R\", with_metadata=False):\n", + " \"\"\"\n", + " Get the runtime (contains signed URL to access gcs data) and apply the\n", + " ToJSONSTring transformation.\n", + " \n", + " Args:\n", + " series: bigframes.series.Series to operate on.\n", + " mode: \"R\" for read, \"RW\" for read/write.\n", + " with_metadata: Whether to fetch and include blob metadata.\n", + " \"\"\"\n", + " # 1. Optionally fetch metadata\n", + " s = (\n", + " bbq.obj.fetch_metadata(series)\n", + " if with_metadata\n", + " else series\n", + " )\n", + " \n", + " # 2. Retrieve the access URL runtime object\n", + " runtime = bbq.obj.get_access_url(s, mode=mode)\n", + " \n", + " # 3. Convert the runtime object to a JSON string\n", + " return bbq.to_json_string(runtime)" + ] + }, { "cell_type": "markdown", "metadata": { @@ -1290,22 +1322,11 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 3, "metadata": { "id": "oDDuYtUm5Yiy" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - } - ], + "outputs": [], "source": [ "df_pdf = bpd.from_glob_path(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*\", name=\"pdf\")" ] @@ -1464,7 +1485,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -1474,7 +1495,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1486,26 +1507,9 @@ "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" ] - }, - { - "data": { - "text/html": [ - "
0 Now, as all books, not primarily intended as p..." - ], - "text/plain": [ - "0 Now, as all books, not primarily intended as p...\n", - "Name: transcribed_content, dtype: string" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ - "import bigframes.bigquery as bbq\n", - "import bigframes.operations as ops\n", - "\n", "# The audio_transcribe function is a convenience wrapper around bigframes.bigquery.ai.generate.\n", "# Here's how to perform the same operation directly:\n", "\n", @@ -1519,8 +1523,8 @@ "\n", "# Convert the audio series to the runtime representation required by the model.\n", "# This involves fetching metadata and getting a signed access URL.\n", - "audio_metadata = audio_series._apply_unary_op(ops.obj_fetch_metadata_op)\n", - "audio_runtime = audio_metadata._apply_unary_op(ops.ObjGetAccessUrl(mode=\"R\"))\n", + "audio_metadata = bbq.obj.fetch_metadata(audio_series)\n", + "audio_runtime = bbq.obj.get_access_url(audio_metadata, mode=\"R\")\n", "\n", "transcribed_results = bbq.ai.generate(\n", " prompt=(prompt_text, audio_runtime),\n", @@ -1534,7 +1538,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1638,7 +1642,7 @@ "\n", "# Generate a JSON string containing the runtime information (including signed read URLs)\n", "# This allows the UDF to download the images from Google Cloud Storage\n", - "access_urls = exif_image_df[\"blob_col\"].blob.get_runtime_json_str(mode=\"R\")\n", + "access_urls = get_runtime_json_str(exif_image_df[\"blob_col\"], mode=\"R\")\n", "\n", "# Apply the BigQuery Python UDF to the runtime JSON strings\n", "# We cast to string to ensure the input matches the UDF's signature\n",