diff --git a/mistral-embed-getting-started/mistral_qdrant_getting_started.ipynb b/mistral-embed-getting-started/mistral_qdrant_getting_started.ipynb new file mode 100644 index 0000000..5d0ef2a --- /dev/null +++ b/mistral-embed-getting-started/mistral_qdrant_getting_started.ipynb @@ -0,0 +1,169 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install qdrant-client mistralai -qq" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from mistralai.client import MistralClient\n", + "from qdrant_client import QdrantClient\n", + "from qdrant_client.http.models import PointStruct, VectorParams, Distance\n", + "collection_name = \"example_collection\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "MISTRAL_API_KEY = \"your_mistral_api_key\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "search_client = QdrantClient(\":memory:\")\n", + "mistral_client = MistralClient(api_key=MISTRAL_API_KEY)\n", + "texts = [\n", + " \"Qdrant is the best vector search engine!\",\n", + " \"Loved by Enterprises and everyone building for low latency, high performance, and scale.\",\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "result = mistral_client.embeddings(\n", + " model=\"mistral-embed\",\n", + " input=texts,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1024" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(result.data[0].embedding)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "points = [\n", + " PointStruct(\n", + " id=idx,\n", + " vector=response.embedding,\n", + " payload={\"text\": text},\n", + " )\n", + " for idx, (response, text) in enumerate(zip(result.data, texts))\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "UpdateResult(operation_id=0, status=)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "search_client.create_collection(collection_name, vectors_config=\n", + " VectorParams(\n", + " size=1024,\n", + " distance=Distance.COSINE,\n", + " )\n", + ")\n", + "search_client.upsert(collection_name, points)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[ScoredPoint(id=0, version=0, score=0.7706972129206147, payload={'text': 'Qdrant is the best vector search engine!'}, vector=None, shard_key=None),\n", + " ScoredPoint(id=1, version=0, score=0.6887654944454833, payload={'text': 'Loved by Enterprises and everyone building for low latency, high performance, and scale.'}, vector=None, shard_key=None)]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "search_client.search(\n", + " collection_name=collection_name,\n", + " query_vector=mistral_client.embeddings(\n", + " model=\"mistral-embed\", input=[\"What is the best to use for vector search scaling?\"]\n", + " ).data[0].embedding,\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "examples", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}