diff --git a/notebooks/advanced_techniques/README.md b/notebooks/advanced_techniques/README.md index bb87f40..573bc87 100644 --- a/notebooks/advanced_techniques/README.md +++ b/notebooks/advanced_techniques/README.md @@ -12,3 +12,4 @@ Jupyter Notebooks that cover advanced techniques such as vector quantization, pa | Quantized Vector Ingestion | MongoDB Atlas, Cohere | [![View Notebook](https://img.shields.io/badge/view-notebook-orange?logo=jupyter)](https://github.com/mongodb-developer/GenAI-Showcase/blob/main/notebooks/advanced_techniques/quantized_vector_ingestion_with_cohere_and_mongodb.ipynb) | | Retrieval Strategies with LlamaIndex | MongoDB Atlas, LlamaIndex | [![View Notebook](https://img.shields.io/badge/view-notebook-orange?logo=jupyter)](https://github.com/mongodb-developer/GenAI-Showcase/blob/main/notebooks/advanced_techniques/retrieval_strategies_mongodb_llamaindex.ipynb) | | Retrieval Strategies with Together AI | MongoDB Atlas, LlamaIndex, Together AI | [![View Notebook](https://img.shields.io/badge/view-notebook-orange?logo=jupyter)](https://github.com/mongodb-developer/GenAI-Showcase/blob/main/notebooks/advanced_techniques/retrieval_strategies_mongodb_llamaindex_togetherai.ipynb) | +| Reciprocal Rank Fusion (RRF) and Relative Score Fusion (RSF) Demonstration | MongoDB Atlas | [![View Notebook](https://img.shields.io/badge/view-notebook-orange?logo=jupyter)](https://github.com/mongodb-developer/GenAI-Showcase/blob/main/notebooks/advanced_techniques/rrf_rsf_demo.ipynb) | diff --git a/notebooks/advanced_techniques/rrf_rsf_demo.ipynb b/notebooks/advanced_techniques/rrf_rsf_demo.ipynb new file mode 100644 index 0000000..d7eaf1d --- /dev/null +++ b/notebooks/advanced_techniques/rrf_rsf_demo.ipynb @@ -0,0 +1,2641 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Recipocal Rank Fusion (RRF) and Relative Score Fusion (RSF) Demonstration\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mongodb-developer/GenAI-Showcase/blob/main/notebooks/advanced_techniques/rrf_rsf_demo.ipynb)\n", + "\n", + "You can view an article that explains concepts in this notebook: [![View Article](https://img.shields.io/badge/View%20Article-blue)](https://mdb.link/rrf_rsf_demo)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kQ2aeJ-EoKPu" + }, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "collapsed": true, + "id": "-zYwYS4dTAeI", + "outputId": "2c50c8b2-61b6-4e56-e120-94ba9b9ceff5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting pymongo\n", + " Downloading pymongo-4.15.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (22 kB)\n", + "Collecting dnspython<3.0.0,>=1.16.0 (from pymongo)\n", + " Downloading dnspython-2.8.0-py3-none-any.whl.metadata (5.7 kB)\n", + "Downloading pymongo-4.15.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (1.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m41.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading dnspython-2.8.0-py3-none-any.whl (331 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m331.1/331.1 kB\u001b[0m \u001b[31m18.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: dnspython, pymongo\n", + "Successfully installed dnspython-2.8.0 pymongo-4.15.4\n" + ] + } + ], + "source": [ + "!pip install pymongo\n", + "\n", + "import pandas as pd\n", + "import pymongo\n", + "from google.colab import userdata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7KQkoFy-Wh9r", + "outputId": "f97c858a-a9cf-4556-ae95-3f5ba9eec794" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Connection to MongoDB successful\n" + ] + }, + { + "data": { + "text/plain": [ + "InsertManyResult([1, 2, 3, 4, 5, 6, 7, 8, 9], acknowledged=True)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def get_mongo_client(mongo_uri):\n", + " \"\"\"Establish connection to MongoDB\"\"\"\n", + " try:\n", + " client = pymongo.MongoClient(mongo_uri, appname=\"devrel.blueprint.hybrid\")\n", + " print(\"Connection to MongoDB successful\")\n", + " return client\n", + " except pymongo.errors.ConnectionFailure as e:\n", + " print(f\"Connection failed: {e}\")\n", + " return None\n", + "\n", + "\n", + "mongodb_uri = userdata.get(\"MONGODB_URI\")\n", + "if not mongodb_uri:\n", + " print(\"MONGODB_URI not set in environment variables\")\n", + "\n", + "client = get_mongo_client(mongodb_uri)\n", + "\n", + "collection = client[\"test\"][\"simple_fusion\"]\n", + "collection.delete_many({})\n", + "\n", + "data = [\n", + " {\"_id\": 1, \"name\": \"Yummy Grub\", \"distance\": 2, \"rating\": 4.1},\n", + " {\"_id\": 2, \"name\": \"Hao Chi Fan\", \"distance\": 15, \"rating\": 4.9},\n", + " {\"_id\": 3, \"name\": \"All Daysayuno\", \"distance\": 5, \"rating\": 4.3},\n", + " {\"_id\": 4, \"name\": \"Soup for Supper\", \"distance\": 3, \"rating\": 3.5},\n", + " {\"_id\": 5, \"name\": \"Salada Grande\", \"distance\": 6, \"rating\": 4.2},\n", + " {\"_id\": 6, \"name\": \"Veggie Bites\", \"distance\": 3, \"rating\": 4},\n", + " {\"_id\": 7, \"name\": \"Food Fiesta\", \"distance\": 1, \"rating\": 2.5},\n", + " {\"_id\": 8, \"name\": \"Pizza & Pie\", \"distance\": 4, \"rating\": 4.4},\n", + " {\"_id\": 9, \"name\": \"Burger Bazaar\", \"distance\": 3, \"rating\": 4.2},\n", + "]\n", + "\n", + "collection.insert_many(data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KCMZo5mGod0E" + }, + "source": [ + "# Example documents" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 331 + }, + "id": "m5brM_QqgM18", + "outputId": "a8a17d13-b092-4c6a-8711-0ebcad2f7261" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"pd\",\n \"rows\": 9,\n \"fields\": [\n {\n \"column\": \"_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2,\n \"min\": 1,\n \"max\": 9,\n \"num_unique_values\": 9,\n \"samples\": [\n 8,\n 2,\n 6\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"Pizza & Pie\",\n \"Hao Chi Fan\",\n \"Veggie Bites\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"distance\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4,\n \"min\": 1,\n \"max\": 15,\n \"num_unique_values\": 7,\n \"samples\": [\n 2,\n 15,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.675359986311827,\n \"min\": 2.5,\n \"max\": 4.9,\n \"num_unique_values\": 8,\n \"samples\": [\n 4.9,\n 4.0,\n 4.1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_idnamedistancerating
01Yummy Grub24.1
12Hao Chi Fan154.9
23All Daysayuno54.3
34Soup for Supper33.5
45Salada Grande64.2
56Veggie Bites34.0
67Food Fiesta12.5
78Pizza & Pie44.4
89Burger Bazaar34.2
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " _id name distance rating\n", + "0 1 Yummy Grub 2 4.1\n", + "1 2 Hao Chi Fan 15 4.9\n", + "2 3 All Daysayuno 5 4.3\n", + "3 4 Soup for Supper 3 3.5\n", + "4 5 Salada Grande 6 4.2\n", + "5 6 Veggie Bites 3 4.0\n", + "6 7 Food Fiesta 1 2.5\n", + "7 8 Pizza & Pie 4 4.4\n", + "8 9 Burger Bazaar 3 4.2" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_docs = collection.find({}).to_list()\n", + "pd.DataFrame(all_docs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vvSYXtPxJ8H9" + }, + "source": [ + "## Top closest and top rated" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1QDzV1WmJ5NZ" + }, + "outputs": [], + "source": [ + "top_closest_ranked = [{\"$sort\": {\"distance\": 1, \"rating\": -1}}, {\"$limit\": 5}]\n", + "top_closest_results_ranked = collection.aggregate(top_closest_ranked).to_list()\n", + "\n", + "top_rated_ranked = [{\"$sort\": {\"rating\": -1, \"distance\": 1}}, {\"$limit\": 5}]\n", + "top_rated_results_ranked = collection.aggregate(top_rated_ranked).to_list()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "Sg30wL8MWjJD", + "outputId": "eade93a8-2eb1-4357-d9ec-5b82024b9cf2" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"pd\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 9,\n \"num_unique_values\": 5,\n \"samples\": [\n 1,\n 4,\n 9\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Yummy Grub\",\n \"Soup for Supper\",\n \"Burger Bazaar\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"distance\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 1,\n 2,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.7021395872616784,\n \"min\": 2.5,\n \"max\": 4.2,\n \"num_unique_values\": 5,\n \"samples\": [\n 4.1,\n 3.5,\n 4.2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_idnamedistancerating
07Food Fiesta12.5
11Yummy Grub24.1
29Burger Bazaar34.2
36Veggie Bites34.0
44Soup for Supper33.5
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " _id name distance rating\n", + "0 7 Food Fiesta 1 2.5\n", + "1 1 Yummy Grub 2 4.1\n", + "2 9 Burger Bazaar 3 4.2\n", + "3 6 Veggie Bites 3 4.0\n", + "4 4 Soup for Supper 3 3.5" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame(top_closest_results_ranked)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "YVc6Pqv7A-89", + "outputId": "8327a6c0-a0d4-462d-f5f4-773b0ac8e9c7" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"pd\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 2,\n \"max\": 9,\n \"num_unique_values\": 5,\n \"samples\": [\n 8,\n 5,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Pizza & Pie\",\n \"Salada Grande\",\n \"All Daysayuno\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"distance\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4,\n \"min\": 3,\n \"max\": 15,\n \"num_unique_values\": 5,\n \"samples\": [\n 4,\n 6,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.29154759474226516,\n \"min\": 4.2,\n \"max\": 4.9,\n \"num_unique_values\": 4,\n \"samples\": [\n 4.4,\n 4.2,\n 4.9\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_idnamedistancerating
02Hao Chi Fan154.9
18Pizza & Pie44.4
23All Daysayuno54.3
39Burger Bazaar34.2
45Salada Grande64.2
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " _id name distance rating\n", + "0 2 Hao Chi Fan 15 4.9\n", + "1 8 Pizza & Pie 4 4.4\n", + "2 3 All Daysayuno 5 4.3\n", + "3 9 Burger Bazaar 3 4.2\n", + "4 5 Salada Grande 6 4.2" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame(top_rated_results_ranked)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Hrg8SUFkooFE" + }, + "source": [ + "# RRF: Reciprocal Rank Fusion" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 331 + }, + "id": "JAeibXevWoBB", + "outputId": "47b21013-e150-4542-9d3b-85c0b21da0cb" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"pd\",\n \"rows\": 9,\n \"fields\": [\n {\n \"column\": \"_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2,\n \"min\": 1,\n \"max\": 9,\n \"num_unique_values\": 9,\n \"samples\": [\n 3,\n 7,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"All Daysayuno\",\n \"Food Fiesta\",\n \"Hao Chi Fan\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"distance\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4,\n \"min\": 1,\n \"max\": 15,\n \"num_unique_values\": 7,\n \"samples\": [\n 3,\n 1,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.675359986311827,\n \"min\": 2.5,\n \"max\": 4.9,\n \"num_unique_values\": 8,\n \"samples\": [\n 2.5,\n 4.9,\n 4.2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.17390919344251815,\n \"min\": 0.46153846153846156,\n \"max\": 1.0243055555555556,\n \"num_unique_values\": 9,\n \"samples\": [\n 0.47619047619047616,\n 0.5737704918032788,\n 0.49180327868852464\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"scoreDetails\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_idnamedistanceratingscorescoreDetails
09Burger Bazaar34.21.024306{'value': 1.0243055555555556, 'description': '...
17Food Fiesta12.50.573770{'value': 0.5737704918032788, 'description': '...
21Yummy Grub24.10.564516{'value': 0.564516129032258, 'description': 'v...
36Veggie Bites34.00.546875{'value': 0.546875, 'description': 'value outp...
44Soup for Supper33.50.538462{'value': 0.5384615384615385, 'description': '...
52Hao Chi Fan154.90.491803{'value': 0.49180327868852464, 'description': ...
68Pizza & Pie44.40.483871{'value': 0.4838709677419355, 'description': '...
73All Daysayuno54.30.476190{'value': 0.47619047619047616, 'description': ...
85Salada Grande64.20.461538{'value': 0.46153846153846156, 'description': ...
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " _id name distance rating score \\\n", + "0 9 Burger Bazaar 3 4.2 1.024306 \n", + "1 7 Food Fiesta 1 2.5 0.573770 \n", + "2 1 Yummy Grub 2 4.1 0.564516 \n", + "3 6 Veggie Bites 3 4.0 0.546875 \n", + "4 4 Soup for Supper 3 3.5 0.538462 \n", + "5 2 Hao Chi Fan 15 4.9 0.491803 \n", + "6 8 Pizza & Pie 4 4.4 0.483871 \n", + "7 3 All Daysayuno 5 4.3 0.476190 \n", + "8 5 Salada Grande 6 4.2 0.461538 \n", + "\n", + " scoreDetails \n", + "0 {'value': 1.0243055555555556, 'description': '... \n", + "1 {'value': 0.5737704918032788, 'description': '... \n", + "2 {'value': 0.564516129032258, 'description': 'v... \n", + "3 {'value': 0.546875, 'description': 'value outp... \n", + "4 {'value': 0.5384615384615385, 'description': '... \n", + "5 {'value': 0.49180327868852464, 'description': ... \n", + "6 {'value': 0.4838709677419355, 'description': '... \n", + "7 {'value': 0.47619047619047616, 'description': ... \n", + "8 {'value': 0.46153846153846156, 'description': ... " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rrf_results = collection.aggregate(\n", + " [\n", + " {\n", + " \"$rankFusion\": {\n", + " \"input\": {\n", + " \"pipelines\": {\n", + " \"distance_pipeline\": top_closest_ranked,\n", + " \"rating_pipeline\": top_rated_ranked,\n", + " }\n", + " },\n", + " \"combination\": {\n", + " \"weights\": {\"distance_pipeline\": 35, \"rating_pipeline\": 30}\n", + " },\n", + " \"scoreDetails\": True,\n", + " }\n", + " },\n", + " {\n", + " \"$addFields\": {\n", + " \"score\": {\"$meta\": \"score\"},\n", + " \"scoreDetails\": {\"$meta\": \"scoreDetails\"},\n", + " }\n", + " },\n", + " ]\n", + ").to_list()\n", + "\n", + "pd.DataFrame(rrf_results)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ilTogKseo_Td" + }, + "source": [ + "# Relative Score Fusion" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RJ6LQ91Xdx40" + }, + "outputs": [], + "source": [ + "top_closest_scored = [\n", + " {\n", + " \"$score\": {\n", + " \"score\": {\"$subtract\": [100, {\"$multiply\": [5.0, \"$distance\"]}]},\n", + " \"normalization\": \"none\",\n", + " }\n", + " },\n", + " {\"$sort\": {\"score\": {\"$meta\": \"score\"}}},\n", + " {\"$limit\": 5},\n", + "]\n", + "top_closest_results_scored = collection.aggregate(top_closest_scored).to_list()\n", + "\n", + "top_rated_scored = [\n", + " {\n", + " \"$score\": {\n", + " \"score\": \"$rating\",\n", + " \"normalization\": \"none\",\n", + " }\n", + " },\n", + " {\"$sort\": {\"score\": {\"$meta\": \"score\"}}},\n", + " {\"$limit\": 5},\n", + "]\n", + "top_rated_results_scored = collection.aggregate(top_rated_scored).to_list()\n", + "\n", + "rsf_results = collection.aggregate(\n", + " [\n", + " {\n", + " \"$scoreFusion\": {\n", + " \"input\": {\n", + " \"pipelines\": {\n", + " \"distance_pipeline\": top_closest_scored,\n", + " \"rating_pipeline\": top_rated_scored,\n", + " },\n", + " \"normalization\": \"sigmoid\",\n", + " },\n", + " \"combination\": {\n", + " \"weights\": {\"distance_pipeline\": 1, \"rating_pipeline\": 1},\n", + " \"method\": \"avg\",\n", + " },\n", + " \"scoreDetails\": True,\n", + " }\n", + " },\n", + " {\n", + " \"$addFields\": {\n", + " \"computed_distance_score\": {\n", + " \"$subtract\": [100, {\"$multiply\": [5.0, \"$distance\"]}]\n", + " },\n", + " \"score\": {\"$meta\": \"score\"},\n", + " \"scoreDetails\": {\"$meta\": \"scoreDetails\"},\n", + " }\n", + " },\n", + " ]\n", + ").to_list()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "WRO1iIOXi1z5", + "outputId": "4c617215-586f-42f5-b76e-2875d49c4675" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"pd\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 9,\n \"num_unique_values\": 5,\n \"samples\": [\n 1,\n 6,\n 9\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Yummy Grub\",\n \"Veggie Bites\",\n \"Burger Bazaar\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"distance\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 1,\n 2,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.7021395872616784,\n \"min\": 2.5,\n \"max\": 4.2,\n \"num_unique_values\": 5,\n \"samples\": [\n 4.1,\n 4.0,\n 4.2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_idnamedistancerating
07Food Fiesta12.5
11Yummy Grub24.1
29Burger Bazaar34.2
34Soup for Supper33.5
46Veggie Bites34.0
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " _id name distance rating\n", + "0 7 Food Fiesta 1 2.5\n", + "1 1 Yummy Grub 2 4.1\n", + "2 9 Burger Bazaar 3 4.2\n", + "3 4 Soup for Supper 3 3.5\n", + "4 6 Veggie Bites 3 4.0" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame(top_closest_results_scored)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "27SB0wztlUEy", + "outputId": "86876d0d-53f2-46b0-ab59-635405532183" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"pd\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 2,\n \"max\": 9,\n \"num_unique_values\": 5,\n \"samples\": [\n 8,\n 5,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Pizza & Pie\",\n \"Salada Grande\",\n \"All Daysayuno\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"distance\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4,\n \"min\": 3,\n \"max\": 15,\n \"num_unique_values\": 5,\n \"samples\": [\n 4,\n 6,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.29154759474226516,\n \"min\": 4.2,\n \"max\": 4.9,\n \"num_unique_values\": 4,\n \"samples\": [\n 4.4,\n 4.2,\n 4.9\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_idnamedistancerating
02Hao Chi Fan154.9
18Pizza & Pie44.4
23All Daysayuno54.3
39Burger Bazaar34.2
45Salada Grande64.2
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " _id name distance rating\n", + "0 2 Hao Chi Fan 15 4.9\n", + "1 8 Pizza & Pie 4 4.4\n", + "2 3 All Daysayuno 5 4.3\n", + "3 9 Burger Bazaar 3 4.2\n", + "4 5 Salada Grande 6 4.2" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame(top_rated_results_scored)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 331 + }, + "id": "3KnywlRHmPOX", + "outputId": "809ec9c4-0cfa-47d3-8a86-a61f9442af4e" + }, + "outputs": [ + { + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"pd\",\n \"rows\": 9,\n \"fields\": [\n {\n \"column\": \"_id\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2,\n \"min\": 1,\n \"max\": 9,\n \"num_unique_values\": 9,\n \"samples\": [\n 3,\n 1,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"All Daysayuno\",\n \"Yummy Grub\",\n \"Hao Chi Fan\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"distance\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4,\n \"min\": 1,\n \"max\": 15,\n \"num_unique_values\": 7,\n \"samples\": [\n 3,\n 2,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.675359986311827,\n \"min\": 2.5,\n \"max\": 4.9,\n \"num_unique_values\": 8,\n \"samples\": [\n 4.1,\n 4.9,\n 4.2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"computed_distance_score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 20.766559657295186,\n \"min\": 25.0,\n \"max\": 95.0,\n \"num_unique_values\": 7,\n \"samples\": [\n 85.0,\n 90.0,\n 75.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.16522747482214264,\n \"min\": 0.49261298415336346,\n \"max\": 0.9926129841533635,\n \"num_unique_values\": 6,\n \"samples\": [\n 0.9926129841533635,\n 0.5,\n 0.49261298415336346\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"scoreDetails\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
_idnamedistanceratingcomputed_distance_scorescorescoreDetails
09Burger Bazaar34.285.00.992613{'value': 0.9926129841533635, 'description': '...
11Yummy Grub24.190.00.500000{'value': 0.5, 'description': 'the value calcu...
24Soup for Supper33.585.00.500000{'value': 0.5, 'description': 'the value calcu...
36Veggie Bites34.085.00.500000{'value': 0.5, 'description': 'the value calcu...
47Food Fiesta12.595.00.500000{'value': 0.5, 'description': 'the value calcu...
52Hao Chi Fan154.925.00.496304{'value': 0.49630422932785906, 'description': ...
68Pizza & Pie44.480.00.493936{'value': 0.49393578250786285, 'description': ...
73All Daysayuno54.375.00.493307{'value': 0.49330654108616756, 'description': ...
85Salada Grande64.270.00.492613{'value': 0.49261298415336346, 'description': ...
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "text/plain": [ + " _id name distance rating computed_distance_score score \\\n", + "0 9 Burger Bazaar 3 4.2 85.0 0.992613 \n", + "1 1 Yummy Grub 2 4.1 90.0 0.500000 \n", + "2 4 Soup for Supper 3 3.5 85.0 0.500000 \n", + "3 6 Veggie Bites 3 4.0 85.0 0.500000 \n", + "4 7 Food Fiesta 1 2.5 95.0 0.500000 \n", + "5 2 Hao Chi Fan 15 4.9 25.0 0.496304 \n", + "6 8 Pizza & Pie 4 4.4 80.0 0.493936 \n", + "7 3 All Daysayuno 5 4.3 75.0 0.493307 \n", + "8 5 Salada Grande 6 4.2 70.0 0.492613 \n", + "\n", + " scoreDetails \n", + "0 {'value': 0.9926129841533635, 'description': '... \n", + "1 {'value': 0.5, 'description': 'the value calcu... \n", + "2 {'value': 0.5, 'description': 'the value calcu... \n", + "3 {'value': 0.5, 'description': 'the value calcu... \n", + "4 {'value': 0.5, 'description': 'the value calcu... \n", + "5 {'value': 0.49630422932785906, 'description': ... \n", + "6 {'value': 0.49393578250786285, 'description': ... \n", + "7 {'value': 0.49330654108616756, 'description': ... \n", + "8 {'value': 0.49261298415336346, 'description': ... " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame(rsf_results)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "kQ2aeJ-EoKPu", + "KCMZo5mGod0E" + ], + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {} + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}