From ad95a1c1a68d23d7fbaef6c72dc1bf07a3f116a8 Mon Sep 17 00:00:00 2001 From: Erik Hatcher Date: Wed, 19 Nov 2025 13:41:29 -0500 Subject: [PATCH 1/3] Initial commit: RRF and RSF demos --- notebooks/advanced_techniques/README.md | 1 + .../advanced_techniques/rrf_rsf_demo.ipynb | 2719 +++++++++++++++++ 2 files changed, 2720 insertions(+) create mode 100644 notebooks/advanced_techniques/rrf_rsf_demo.ipynb diff --git a/notebooks/advanced_techniques/README.md b/notebooks/advanced_techniques/README.md index bb87f40..3c208dd 100644 --- a/notebooks/advanced_techniques/README.md +++ b/notebooks/advanced_techniques/README.md @@ -12,3 +12,4 @@ Jupyter Notebooks that cover advanced techniques such as vector quantization, pa | Quantized Vector Ingestion | MongoDB Atlas, Cohere | [![View Notebook](https://img.shields.io/badge/view-notebook-orange?logo=jupyter)](https://github.com/mongodb-developer/GenAI-Showcase/blob/main/notebooks/advanced_techniques/quantized_vector_ingestion_with_cohere_and_mongodb.ipynb) | | Retrieval Strategies with LlamaIndex | MongoDB Atlas, LlamaIndex | [![View Notebook](https://img.shields.io/badge/view-notebook-orange?logo=jupyter)](https://github.com/mongodb-developer/GenAI-Showcase/blob/main/notebooks/advanced_techniques/retrieval_strategies_mongodb_llamaindex.ipynb) | | Retrieval Strategies with Together AI | MongoDB Atlas, LlamaIndex, Together AI | [![View Notebook](https://img.shields.io/badge/view-notebook-orange?logo=jupyter)](https://github.com/mongodb-developer/GenAI-Showcase/blob/main/notebooks/advanced_techniques/retrieval_strategies_mongodb_llamaindex_togetherai.ipynb) | +| Reciprocal Rank Fusion (RRF) and Relative Score Fusion (RSF) Example | MongoDB Atlas | [![View Notebook](https://img.shields.io/badge/view-notebook-orange?logo=jupyter)](https://github.com/mongodb-developer/GenAI-Showcase/blob/main/notebooks/advanced_techniques/rrf_rsf_demo.ipynb) | diff --git a/notebooks/advanced_techniques/rrf_rsf_demo.ipynb b/notebooks/advanced_techniques/rrf_rsf_demo.ipynb new file mode 100644 index 0000000..7010513 --- /dev/null +++ b/notebooks/advanced_techniques/rrf_rsf_demo.ipynb @@ -0,0 +1,2719 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "kQ2aeJ-EoKPu" + }, + "source": [ + "# Recipocal Rank Fusion (RRF) and Relative Score Fusion (RSF) Demostration\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mongodb-developer/GenAI-Showcase/blob/main/notebooks/advanced_techniques/rrf_rsf_demo.ipynb)\n", + "\n", + "\n", + "You can view an article that explains concepts in this notebook:\n", + "\n", + "[![View Article](https://img.shields.io/badge/View%20Article-blue)](https://mdb.link/rrf_rsf_demo)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "collapsed": true, + "id": "-zYwYS4dTAeI", + "outputId": "a57f35bb-28fa-4412-cd46-d66f6594878e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting pymongo\n", + " Downloading pymongo-4.15.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (22 kB)\n", + "Collecting dnspython<3.0.0,>=1.16.0 (from pymongo)\n", + " Downloading dnspython-2.8.0-py3-none-any.whl.metadata (5.7 kB)\n", + "Downloading pymongo-4.15.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (1.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m24.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading dnspython-2.8.0-py3-none-any.whl (331 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m331.1/331.1 kB\u001b[0m \u001b[31m20.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: dnspython, pymongo\n", + "Successfully installed dnspython-2.8.0 pymongo-4.15.4\n" + ] + } + ], + "source": [ + "!pip install pymongo\n", + "import pymongo\n", + "from pymongo import MongoClient\n", + "\n", + "import pandas as pd\n", + "\n", + "from google.colab import userdata\n", + "from google.colab import data_table\n", + "#?? data_table.enable_dataframe_formatter() # Enable interactive data table display (run this once per notebook)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7KQkoFy-Wh9r", + "outputId": "29b89dd0-9d34-4aa9-9648-a02d63cdcda2" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Connection to MongoDB successful\n" + ] + }, + { + "data": { + "text/plain": [ + "InsertManyResult([1, 2, 3, 4, 5, 6, 7, 8, 9], acknowledged=True)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def get_mongo_client(mongo_uri):\n", + " \"\"\"Establish connection to MongoDB\"\"\"\n", + " try:\n", + " client = pymongo.MongoClient(mongo_uri, appname=\"devrel.blueprint.hybrid\")\n", + " print(\"Connection to MongoDB successful\")\n", + " return client\n", + " except pymongo.errors.ConnectionFailure as e:\n", + " print(f\"Connection failed: {e}\")\n", + " return None\n", + "\n", + "mongodb_uri = userdata.get('MONGODB_URI')\n", + "if not mongodb_uri:\n", + " print(\"MONGODB_URI not set in environment variables\")\n", + "\n", + "client = get_mongo_client(mongodb_uri)\n", + "\n", + "collection = client['test']['simple_fusion']\n", + "collection.delete_many({})\n", + "\n", + "data = [{\n", + " \"_id\": 1,\n", + " \"name\": \"Yummy Grub\",\n", + " \"distance\": 2,\n", + " \"rating\": 4.1\n", + "},\n", + "{\n", + " \"_id\": 2,\n", + " \"name\": \"Hao Chi Fan\",\n", + " \"distance\": 15,\n", + " \"rating\": 4.9\n", + "},\n", + "{\n", + " \"_id\": 3,\n", + " \"name\": \"All Daysayuno\",\n", + " \"distance\": 5,\n", + " \"rating\": 4.3\n", + "},\n", + "{\n", + " \"_id\": 4,\n", + " \"name\": \"Soup for Supper\",\n", + " \"distance\": 3,\n", + " \"rating\": 3.5\n", + "},\n", + "{\n", + " \"_id\": 5,\n", + " \"name\": \"Salada Grande\",\n", + " \"distance\": 6,\n", + " \"rating\": 4.2\n", + "},\n", + "{\n", + " \"_id\": 6,\n", + " \"name\": \"Veggie Bites\",\n", + " \"distance\": 3,\n", + " \"rating\": 4\n", + "},\n", + "{\n", + " \"_id\": 7,\n", + " \"name\": \"Food Fiesta\",\n", + " \"distance\": 1,\n", + " \"rating\": 2.5\n", + "},\n", + "{\n", + " \"_id\": 8,\n", + " \"name\": \"Pizza & Pie\",\n", + " \"distance\": 4,\n", + " \"rating\": 4.4\n", + "},\n", + "{\n", + " \"_id\": 9,\n", + " \"name\": \"Burger Bazaar\",\n", + " \"distance\": 3,\n", + " \"rating\": 4.2\n", + "}]\n", + "\n", + "collection.insert_many(data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KCMZo5mGod0E" + }, + "source": [ + "# Example documents" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 331 + }, + "id": "m5brM_QqgM18", + "outputId": "87dc5228-4f3a-4b9e-f22d-17861099fef5" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"pd\",\n \"rows\": 9,\n \"fields\": [\n {\n \"column\": \"_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2,\n \"min\": 1,\n \"max\": 9,\n \"num_unique_values\": 9,\n \"samples\": [\n 8,\n 2,\n 6\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"Pizza & Pie\",\n \"Hao Chi Fan\",\n \"Veggie Bites\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"distance\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4,\n \"min\": 1,\n \"max\": 15,\n \"num_unique_values\": 7,\n \"samples\": [\n 2,\n 15,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.675359986311827,\n \"min\": 2.5,\n \"max\": 4.9,\n \"num_unique_values\": 8,\n \"samples\": [\n 4.9,\n 4.0,\n 4.1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_idnamedistancerating
01Yummy Grub24.1
12Hao Chi Fan154.9
23All Daysayuno54.3
34Soup for Supper33.5
45Salada Grande64.2
56Veggie Bites34.0
67Food Fiesta12.5
78Pizza & Pie44.4
89Burger Bazaar34.2
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " _id name distance rating\n", + "0 1 Yummy Grub 2 4.1\n", + "1 2 Hao Chi Fan 15 4.9\n", + "2 3 All Daysayuno 5 4.3\n", + "3 4 Soup for Supper 3 3.5\n", + "4 5 Salada Grande 6 4.2\n", + "5 6 Veggie Bites 3 4.0\n", + "6 7 Food Fiesta 1 2.5\n", + "7 8 Pizza & Pie 4 4.4\n", + "8 9 Burger Bazaar 3 4.2" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_docs = collection.find({}).to_list()\n", + "pd.DataFrame(all_docs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vvSYXtPxJ8H9" + }, + "source": [ + "## Top closest and top rated" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "1QDzV1WmJ5NZ" + }, + "outputs": [], + "source": [ + "top_closest_ranked = [\n", + " {\n", + " '$sort': {\n", + " 'distance': 1, 'rating': -1\n", + " }\n", + " },\n", + " {\n", + " '$limit': 5\n", + " }\n", + "]\n", + "top_closest_results_ranked = collection.aggregate(top_closest_ranked).to_list()\n", + "\n", + "top_rated_ranked = [\n", + " {\n", + " '$sort': {\n", + " 'rating': -1, 'distance': 1\n", + " }\n", + " },\n", + " {\n", + " '$limit': 5\n", + " }\n", + "]\n", + "top_rated_results_ranked = collection.aggregate(top_rated_ranked).to_list()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "Sg30wL8MWjJD", + "outputId": "8d7c71a7-4ff9-4a61-9516-ff6f8e5c8f73" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"pd\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 9,\n \"num_unique_values\": 5,\n \"samples\": [\n 1,\n 4,\n 9\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Yummy Grub\",\n \"Soup for Supper\",\n \"Burger Bazaar\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"distance\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 1,\n 2,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.7021395872616784,\n \"min\": 2.5,\n \"max\": 4.2,\n \"num_unique_values\": 5,\n \"samples\": [\n 4.1,\n 3.5,\n 4.2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_idnamedistancerating
07Food Fiesta12.5
11Yummy Grub24.1
29Burger Bazaar34.2
36Veggie Bites34.0
44Soup for Supper33.5
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " _id name distance rating\n", + "0 7 Food Fiesta 1 2.5\n", + "1 1 Yummy Grub 2 4.1\n", + "2 9 Burger Bazaar 3 4.2\n", + "3 6 Veggie Bites 3 4.0\n", + "4 4 Soup for Supper 3 3.5" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame(top_closest_results_ranked)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "YVc6Pqv7A-89", + "outputId": "84f7b9dc-ae1b-4898-bc53-7e2e8ff32c01" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"pd\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 2,\n \"max\": 9,\n \"num_unique_values\": 5,\n \"samples\": [\n 8,\n 5,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Pizza & Pie\",\n \"Salada Grande\",\n \"All Daysayuno\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"distance\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4,\n \"min\": 3,\n \"max\": 15,\n \"num_unique_values\": 5,\n \"samples\": [\n 4,\n 6,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.29154759474226516,\n \"min\": 4.2,\n \"max\": 4.9,\n \"num_unique_values\": 4,\n \"samples\": [\n 4.4,\n 4.2,\n 4.9\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_idnamedistancerating
02Hao Chi Fan154.9
18Pizza & Pie44.4
23All Daysayuno54.3
39Burger Bazaar34.2
45Salada Grande64.2
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " _id name distance rating\n", + "0 2 Hao Chi Fan 15 4.9\n", + "1 8 Pizza & Pie 4 4.4\n", + "2 3 All Daysayuno 5 4.3\n", + "3 9 Burger Bazaar 3 4.2\n", + "4 5 Salada Grande 6 4.2" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame(top_rated_results_ranked)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Hrg8SUFkooFE" + }, + "source": [ + "# RRF: Reciprocal Rank Fusion" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 331 + }, + "id": "JAeibXevWoBB", + "outputId": "5e6fe693-ef9e-4a5d-b679-2cb11ea49456" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"pd\",\n \"rows\": 9,\n \"fields\": [\n {\n \"column\": \"_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2,\n \"min\": 1,\n \"max\": 9,\n \"num_unique_values\": 9,\n \"samples\": [\n 3,\n 7,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"All Daysayuno\",\n \"Food Fiesta\",\n \"Hao Chi Fan\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"distance\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4,\n \"min\": 1,\n \"max\": 15,\n \"num_unique_values\": 7,\n \"samples\": [\n 3,\n 1,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.675359986311827,\n \"min\": 2.5,\n \"max\": 4.9,\n \"num_unique_values\": 8,\n \"samples\": [\n 2.5,\n 4.9,\n 4.2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.17390919344251815,\n \"min\": 0.46153846153846156,\n \"max\": 1.0243055555555556,\n \"num_unique_values\": 9,\n \"samples\": [\n 0.47619047619047616,\n 0.5737704918032788,\n 0.49180327868852464\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"scoreDetails\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_idnamedistanceratingscorescoreDetails
09Burger Bazaar34.21.024306{'value': 1.0243055555555556, 'description': '...
17Food Fiesta12.50.573770{'value': 0.5737704918032788, 'description': '...
21Yummy Grub24.10.564516{'value': 0.564516129032258, 'description': 'v...
36Veggie Bites34.00.546875{'value': 0.546875, 'description': 'value outp...
44Soup for Supper33.50.538462{'value': 0.5384615384615385, 'description': '...
52Hao Chi Fan154.90.491803{'value': 0.49180327868852464, 'description': ...
68Pizza & Pie44.40.483871{'value': 0.4838709677419355, 'description': '...
73All Daysayuno54.30.476190{'value': 0.47619047619047616, 'description': ...
85Salada Grande64.20.461538{'value': 0.46153846153846156, 'description': ...
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " _id name distance rating score \\\n", + "0 9 Burger Bazaar 3 4.2 1.024306 \n", + "1 7 Food Fiesta 1 2.5 0.573770 \n", + "2 1 Yummy Grub 2 4.1 0.564516 \n", + "3 6 Veggie Bites 3 4.0 0.546875 \n", + "4 4 Soup for Supper 3 3.5 0.538462 \n", + "5 2 Hao Chi Fan 15 4.9 0.491803 \n", + "6 8 Pizza & Pie 4 4.4 0.483871 \n", + "7 3 All Daysayuno 5 4.3 0.476190 \n", + "8 5 Salada Grande 6 4.2 0.461538 \n", + "\n", + " scoreDetails \n", + "0 {'value': 1.0243055555555556, 'description': '... \n", + "1 {'value': 0.5737704918032788, 'description': '... \n", + "2 {'value': 0.564516129032258, 'description': 'v... \n", + "3 {'value': 0.546875, 'description': 'value outp... \n", + "4 {'value': 0.5384615384615385, 'description': '... \n", + "5 {'value': 0.49180327868852464, 'description': ... \n", + "6 {'value': 0.4838709677419355, 'description': '... \n", + "7 {'value': 0.47619047619047616, 'description': ... \n", + "8 {'value': 0.46153846153846156, 'description': ... " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rrf_results = collection.aggregate([\n", + " {\n", + " '$rankFusion': {\n", + " 'input': {\n", + " 'pipelines': {\n", + " 'distance_pipeline': top_closest_ranked,\n", + " 'rating_pipeline': top_rated_ranked\n", + " }\n", + " },\n", + " 'combination': {\n", + " 'weights': {\n", + " 'distance_pipeline': 35,\n", + " 'rating_pipeline': 30\n", + " }\n", + " },\n", + " 'scoreDetails': True\n", + " }\n", + " },\n", + " {\n", + " '$addFields': {\n", + " 'score': {'$meta': 'score'},\n", + " 'scoreDetails': {'$meta': 'scoreDetails'}\n", + " }\n", + " }\n", + "]).to_list()\n", + "\n", + "pd.DataFrame(rrf_results)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ilTogKseo_Td" + }, + "source": [ + "# Relative Score Fusion" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "RJ6LQ91Xdx40" + }, + "outputs": [], + "source": [ + "top_closest_scored = [\n", + " {\n", + " '$score': {\n", + " 'score': {\n", + " '$subtract': [100, {'$multiply': [5.0, \"$distance\"]}]\n", + " },\n", + " 'normalization': 'none',\n", + " }\n", + " },\n", + " {\n", + " '$sort': {\n", + " 'score': {'$meta': 'score'}\n", + " }\n", + " },\n", + " {\n", + " '$limit': 5\n", + " },\n", + "]\n", + "top_closest_results_scored = collection.aggregate(top_closest_scored).to_list()\n", + "\n", + "top_rated_scored = [\n", + " {\n", + " '$score': {\n", + " 'score': '$rating',\n", + " 'normalization': 'none',\n", + " }\n", + " },\n", + " {\n", + " '$sort': {\n", + " 'score': {'$meta': 'score'}\n", + " }\n", + " },\n", + " {\n", + " '$limit': 5\n", + " },\n", + "]\n", + "top_rated_results_scored = collection.aggregate(top_rated_scored).to_list()\n", + "\n", + "rsf_results = collection.aggregate([\n", + " {\n", + " '$scoreFusion': {\n", + " 'input': {\n", + " 'pipelines': {\n", + " 'distance_pipeline': top_closest_scored,\n", + " 'rating_pipeline': top_rated_scored\n", + " },\n", + " 'normalization': 'sigmoid'\n", + " },\n", + " 'combination': {\n", + " 'weights': {\n", + " 'distance_pipeline': 1,\n", + " 'rating_pipeline': 1\n", + " },\n", + " 'method': 'avg'\n", + " },\n", + " 'scoreDetails': True\n", + " }\n", + " },\n", + " {\n", + " '$addFields': {\n", + " 'computed_distance_score': {\n", + " '$subtract': [100, {'$multiply': [5.0, \"$distance\"]}]\n", + " },\n", + " 'score': {'$meta': 'score'},\n", + " 'scoreDetails': {'$meta': 'scoreDetails'}\n", + " }\n", + " }\n", + "]).to_list()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "WRO1iIOXi1z5", + "outputId": "4b723a05-843a-40b3-cc4f-d263f0ce1375" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"pd\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 9,\n \"num_unique_values\": 5,\n \"samples\": [\n 1,\n 6,\n 9\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Yummy Grub\",\n \"Veggie Bites\",\n \"Burger Bazaar\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"distance\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 1,\n 2,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.7021395872616784,\n \"min\": 2.5,\n \"max\": 4.2,\n \"num_unique_values\": 5,\n \"samples\": [\n 4.1,\n 4.0,\n 4.2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_idnamedistancerating
07Food Fiesta12.5
11Yummy Grub24.1
29Burger Bazaar34.2
34Soup for Supper33.5
46Veggie Bites34.0
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " _id name distance rating\n", + "0 7 Food Fiesta 1 2.5\n", + "1 1 Yummy Grub 2 4.1\n", + "2 9 Burger Bazaar 3 4.2\n", + "3 4 Soup for Supper 3 3.5\n", + "4 6 Veggie Bites 3 4.0" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame(top_closest_results_scored)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "27SB0wztlUEy", + "outputId": "1b239885-7ccf-4c5b-9da7-46079924dad9" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"pd\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 2,\n \"max\": 9,\n \"num_unique_values\": 5,\n \"samples\": [\n 8,\n 5,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Pizza & Pie\",\n \"Salada Grande\",\n \"All Daysayuno\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"distance\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4,\n \"min\": 3,\n \"max\": 15,\n \"num_unique_values\": 5,\n \"samples\": [\n 4,\n 6,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.29154759474226516,\n \"min\": 4.2,\n \"max\": 4.9,\n \"num_unique_values\": 4,\n \"samples\": [\n 4.4,\n 4.2,\n 4.9\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_idnamedistancerating
02Hao Chi Fan154.9
18Pizza & Pie44.4
23All Daysayuno54.3
39Burger Bazaar34.2
45Salada Grande64.2
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " _id name distance rating\n", + "0 2 Hao Chi Fan 15 4.9\n", + "1 8 Pizza & Pie 4 4.4\n", + "2 3 All Daysayuno 5 4.3\n", + "3 9 Burger Bazaar 3 4.2\n", + "4 5 Salada Grande 6 4.2" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame(top_rated_results_scored)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 331 + }, + "id": "3KnywlRHmPOX", + "outputId": "ad330723-6b0e-40d6-fb93-f50155518f8d" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"pd\",\n \"rows\": 9,\n \"fields\": [\n {\n \"column\": \"_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2,\n \"min\": 1,\n \"max\": 9,\n \"num_unique_values\": 9,\n \"samples\": [\n 3,\n 1,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"All Daysayuno\",\n \"Yummy Grub\",\n \"Hao Chi Fan\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"distance\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4,\n \"min\": 1,\n \"max\": 15,\n \"num_unique_values\": 7,\n \"samples\": [\n 3,\n 2,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.675359986311827,\n \"min\": 2.5,\n \"max\": 4.9,\n \"num_unique_values\": 8,\n \"samples\": [\n 4.1,\n 4.9,\n 4.2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"computed_distance_score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 20.766559657295186,\n \"min\": 25.0,\n \"max\": 95.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 85.0,\n 90.0,\n 75.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.16522747482214264,\n \"min\": 0.49261298415336346,\n \"max\": 0.9926129841533635,\n \"num_unique_values\": 6,\n \"samples\": [\n 0.9926129841533635,\n 0.5,\n 0.49261298415336346\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"scoreDetails\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_idnamedistanceratingcomputed_distance_scorescorescoreDetails
09Burger Bazaar34.285.00.992613{'value': 0.9926129841533635, 'description': '...
11Yummy Grub24.190.00.500000{'value': 0.5, 'description': 'the value calcu...
24Soup for Supper33.585.00.500000{'value': 0.5, 'description': 'the value calcu...
36Veggie Bites34.085.00.500000{'value': 0.5, 'description': 'the value calcu...
47Food Fiesta12.595.00.500000{'value': 0.5, 'description': 'the value calcu...
52Hao Chi Fan154.925.00.496304{'value': 0.49630422932785906, 'description': ...
68Pizza & Pie44.480.00.493936{'value': 0.49393578250786285, 'description': ...
73All Daysayuno54.375.00.493307{'value': 0.49330654108616756, 'description': ...
85Salada Grande64.270.00.492613{'value': 0.49261298415336346, 'description': ...
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " _id name distance rating computed_distance_score score \\\n", + "0 9 Burger Bazaar 3 4.2 85.0 0.992613 \n", + "1 1 Yummy Grub 2 4.1 90.0 0.500000 \n", + "2 4 Soup for Supper 3 3.5 85.0 0.500000 \n", + "3 6 Veggie Bites 3 4.0 85.0 0.500000 \n", + "4 7 Food Fiesta 1 2.5 95.0 0.500000 \n", + "5 2 Hao Chi Fan 15 4.9 25.0 0.496304 \n", + "6 8 Pizza & Pie 4 4.4 80.0 0.493936 \n", + "7 3 All Daysayuno 5 4.3 75.0 0.493307 \n", + "8 5 Salada Grande 6 4.2 70.0 0.492613 \n", + "\n", + " scoreDetails \n", + "0 {'value': 0.9926129841533635, 'description': '... \n", + "1 {'value': 0.5, 'description': 'the value calcu... \n", + "2 {'value': 0.5, 'description': 'the value calcu... \n", + "3 {'value': 0.5, 'description': 'the value calcu... \n", + "4 {'value': 0.5, 'description': 'the value calcu... \n", + "5 {'value': 0.49630422932785906, 'description': ... \n", + "6 {'value': 0.49393578250786285, 'description': ... \n", + "7 {'value': 0.49330654108616756, 'description': ... \n", + "8 {'value': 0.49261298415336346, 'description': ... " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame(rsf_results)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "kQ2aeJ-EoKPu", + "KCMZo5mGod0E" + ], + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From cb6905b3f42fbe0a1333fded4cb56aa0527426bd Mon Sep 17 00:00:00 2001 From: Erik Hatcher Date: Thu, 20 Nov 2025 09:07:42 -0500 Subject: [PATCH 2/3] correct spelling --- notebooks/advanced_techniques/README.md | 2 +- notebooks/advanced_techniques/rrf_rsf_demo.ipynb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/notebooks/advanced_techniques/README.md b/notebooks/advanced_techniques/README.md index 3c208dd..573bc87 100644 --- a/notebooks/advanced_techniques/README.md +++ b/notebooks/advanced_techniques/README.md @@ -12,4 +12,4 @@ Jupyter Notebooks that cover advanced techniques such as vector quantization, pa | Quantized Vector Ingestion | MongoDB Atlas, Cohere | [![View Notebook](https://img.shields.io/badge/view-notebook-orange?logo=jupyter)](https://github.com/mongodb-developer/GenAI-Showcase/blob/main/notebooks/advanced_techniques/quantized_vector_ingestion_with_cohere_and_mongodb.ipynb) | | Retrieval Strategies with LlamaIndex | MongoDB Atlas, LlamaIndex | [![View Notebook](https://img.shields.io/badge/view-notebook-orange?logo=jupyter)](https://github.com/mongodb-developer/GenAI-Showcase/blob/main/notebooks/advanced_techniques/retrieval_strategies_mongodb_llamaindex.ipynb) | | Retrieval Strategies with Together AI | MongoDB Atlas, LlamaIndex, Together AI | [![View Notebook](https://img.shields.io/badge/view-notebook-orange?logo=jupyter)](https://github.com/mongodb-developer/GenAI-Showcase/blob/main/notebooks/advanced_techniques/retrieval_strategies_mongodb_llamaindex_togetherai.ipynb) | -| Reciprocal Rank Fusion (RRF) and Relative Score Fusion (RSF) Example | MongoDB Atlas | [![View Notebook](https://img.shields.io/badge/view-notebook-orange?logo=jupyter)](https://github.com/mongodb-developer/GenAI-Showcase/blob/main/notebooks/advanced_techniques/rrf_rsf_demo.ipynb) | +| Reciprocal Rank Fusion (RRF) and Relative Score Fusion (RSF) Demonstration | MongoDB Atlas | [![View Notebook](https://img.shields.io/badge/view-notebook-orange?logo=jupyter)](https://github.com/mongodb-developer/GenAI-Showcase/blob/main/notebooks/advanced_techniques/rrf_rsf_demo.ipynb) | diff --git a/notebooks/advanced_techniques/rrf_rsf_demo.ipynb b/notebooks/advanced_techniques/rrf_rsf_demo.ipynb index 7010513..9548aa5 100644 --- a/notebooks/advanced_techniques/rrf_rsf_demo.ipynb +++ b/notebooks/advanced_techniques/rrf_rsf_demo.ipynb @@ -6,7 +6,7 @@ "id": "kQ2aeJ-EoKPu" }, "source": [ - "# Recipocal Rank Fusion (RRF) and Relative Score Fusion (RSF) Demostration\n", + "# Recipocal Rank Fusion (RRF) and Relative Score Fusion (RSF) Demonstration\n", "\n", "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mongodb-developer/GenAI-Showcase/blob/main/notebooks/advanced_techniques/rrf_rsf_demo.ipynb)\n", "\n", From 68406ee841f68d243898adb7a13d8be86443db67 Mon Sep 17 00:00:00 2001 From: Erik Hatcher Date: Thu, 20 Nov 2025 09:25:39 -0500 Subject: [PATCH 3/3] pre-commit fixes --- .../advanced_techniques/rrf_rsf_demo.ipynb | 432 +++++++----------- 1 file changed, 177 insertions(+), 255 deletions(-) diff --git a/notebooks/advanced_techniques/rrf_rsf_demo.ipynb b/notebooks/advanced_techniques/rrf_rsf_demo.ipynb index 9548aa5..d7eaf1d 100644 --- a/notebooks/advanced_techniques/rrf_rsf_demo.ipynb +++ b/notebooks/advanced_techniques/rrf_rsf_demo.ipynb @@ -2,37 +2,34 @@ "cells": [ { "cell_type": "markdown", - "metadata": { - "id": "kQ2aeJ-EoKPu" - }, + "metadata": {}, "source": [ "# Recipocal Rank Fusion (RRF) and Relative Score Fusion (RSF) Demonstration\n", "\n", "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mongodb-developer/GenAI-Showcase/blob/main/notebooks/advanced_techniques/rrf_rsf_demo.ipynb)\n", "\n", - "\n", - "You can view an article that explains concepts in this notebook:\n", - "\n", - "[![View Article](https://img.shields.io/badge/View%20Article-blue)](https://mdb.link/rrf_rsf_demo)" + "You can view an article that explains concepts in this notebook: [![View Article](https://img.shields.io/badge/View%20Article-blue)](https://mdb.link/rrf_rsf_demo)" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "id": "kQ2aeJ-EoKPu" + }, "source": [ "# Setup" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "collapsed": true, "id": "-zYwYS4dTAeI", - "outputId": "a57f35bb-28fa-4412-cd46-d66f6594878e" + "outputId": "2c50c8b2-61b6-4e56-e120-94ba9b9ceff5" }, "outputs": [ { @@ -44,9 +41,9 @@ "Collecting dnspython<3.0.0,>=1.16.0 (from pymongo)\n", " Downloading dnspython-2.8.0-py3-none-any.whl.metadata (5.7 kB)\n", "Downloading pymongo-4.15.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (1.7 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m24.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m41.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading dnspython-2.8.0-py3-none-any.whl (331 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m331.1/331.1 kB\u001b[0m \u001b[31m20.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m331.1/331.1 kB\u001b[0m \u001b[31m18.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hInstalling collected packages: dnspython, pymongo\n", "Successfully installed dnspython-2.8.0 pymongo-4.15.4\n" ] @@ -54,25 +51,21 @@ ], "source": [ "!pip install pymongo\n", - "import pymongo\n", - "from pymongo import MongoClient\n", "\n", "import pandas as pd\n", - "\n", - "from google.colab import userdata\n", - "from google.colab import data_table\n", - "#?? data_table.enable_dataframe_formatter() # Enable interactive data table display (run this once per notebook)\n" + "import pymongo\n", + "from google.colab import userdata" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "7KQkoFy-Wh9r", - "outputId": "29b89dd0-9d34-4aa9-9648-a02d63cdcda2" + "outputId": "f97c858a-a9cf-4556-ae95-3f5ba9eec794" }, "outputs": [ { @@ -95,78 +88,36 @@ ], "source": [ "def get_mongo_client(mongo_uri):\n", - " \"\"\"Establish connection to MongoDB\"\"\"\n", - " try:\n", - " client = pymongo.MongoClient(mongo_uri, appname=\"devrel.blueprint.hybrid\")\n", - " print(\"Connection to MongoDB successful\")\n", - " return client\n", - " except pymongo.errors.ConnectionFailure as e:\n", - " print(f\"Connection failed: {e}\")\n", - " return None\n", + " \"\"\"Establish connection to MongoDB\"\"\"\n", + " try:\n", + " client = pymongo.MongoClient(mongo_uri, appname=\"devrel.blueprint.hybrid\")\n", + " print(\"Connection to MongoDB successful\")\n", + " return client\n", + " except pymongo.errors.ConnectionFailure as e:\n", + " print(f\"Connection failed: {e}\")\n", + " return None\n", + "\n", "\n", - "mongodb_uri = userdata.get('MONGODB_URI')\n", + "mongodb_uri = userdata.get(\"MONGODB_URI\")\n", "if not mongodb_uri:\n", - " print(\"MONGODB_URI not set in environment variables\")\n", + " print(\"MONGODB_URI not set in environment variables\")\n", "\n", "client = get_mongo_client(mongodb_uri)\n", "\n", - "collection = client['test']['simple_fusion']\n", + "collection = client[\"test\"][\"simple_fusion\"]\n", "collection.delete_many({})\n", "\n", - "data = [{\n", - " \"_id\": 1,\n", - " \"name\": \"Yummy Grub\",\n", - " \"distance\": 2,\n", - " \"rating\": 4.1\n", - "},\n", - "{\n", - " \"_id\": 2,\n", - " \"name\": \"Hao Chi Fan\",\n", - " \"distance\": 15,\n", - " \"rating\": 4.9\n", - "},\n", - "{\n", - " \"_id\": 3,\n", - " \"name\": \"All Daysayuno\",\n", - " \"distance\": 5,\n", - " \"rating\": 4.3\n", - "},\n", - "{\n", - " \"_id\": 4,\n", - " \"name\": \"Soup for Supper\",\n", - " \"distance\": 3,\n", - " \"rating\": 3.5\n", - "},\n", - "{\n", - " \"_id\": 5,\n", - " \"name\": \"Salada Grande\",\n", - " \"distance\": 6,\n", - " \"rating\": 4.2\n", - "},\n", - "{\n", - " \"_id\": 6,\n", - " \"name\": \"Veggie Bites\",\n", - " \"distance\": 3,\n", - " \"rating\": 4\n", - "},\n", - "{\n", - " \"_id\": 7,\n", - " \"name\": \"Food Fiesta\",\n", - " \"distance\": 1,\n", - " \"rating\": 2.5\n", - "},\n", - "{\n", - " \"_id\": 8,\n", - " \"name\": \"Pizza & Pie\",\n", - " \"distance\": 4,\n", - " \"rating\": 4.4\n", - "},\n", - "{\n", - " \"_id\": 9,\n", - " \"name\": \"Burger Bazaar\",\n", - " \"distance\": 3,\n", - " \"rating\": 4.2\n", - "}]\n", + "data = [\n", + " {\"_id\": 1, \"name\": \"Yummy Grub\", \"distance\": 2, \"rating\": 4.1},\n", + " {\"_id\": 2, \"name\": \"Hao Chi Fan\", \"distance\": 15, \"rating\": 4.9},\n", + " {\"_id\": 3, \"name\": \"All Daysayuno\", \"distance\": 5, \"rating\": 4.3},\n", + " {\"_id\": 4, \"name\": \"Soup for Supper\", \"distance\": 3, \"rating\": 3.5},\n", + " {\"_id\": 5, \"name\": \"Salada Grande\", \"distance\": 6, \"rating\": 4.2},\n", + " {\"_id\": 6, \"name\": \"Veggie Bites\", \"distance\": 3, \"rating\": 4},\n", + " {\"_id\": 7, \"name\": \"Food Fiesta\", \"distance\": 1, \"rating\": 2.5},\n", + " {\"_id\": 8, \"name\": \"Pizza & Pie\", \"distance\": 4, \"rating\": 4.4},\n", + " {\"_id\": 9, \"name\": \"Burger Bazaar\", \"distance\": 3, \"rating\": 4.2},\n", + "]\n", "\n", "collection.insert_many(data)" ] @@ -182,14 +133,14 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 331 }, "id": "m5brM_QqgM18", - "outputId": "87dc5228-4f3a-4b9e-f22d-17861099fef5" + "outputId": "a8a17d13-b092-4c6a-8711-0ebcad2f7261" }, "outputs": [ { @@ -200,7 +151,7 @@ }, "text/html": [ "\n", - "
\n", + "
\n", "
\n", "