Skip to content

Commit 4010a57

Browse files
tylerhutchersonnabsabraham
authored andcommitted
Add Cohere vectorizer (#98)
Adding support for CohereTextVectorizer with embed and embed_many functionality. Contributed by @nabsabraham --------- Co-authored-by: nabsabraham <nabila.abraham@gmail.com>
1 parent bf88687 commit 4010a57

File tree

12 files changed

+354
-12
lines changed

12 files changed

+354
-12
lines changed

.github/workflows/run_tests.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ jobs:
5959
OPENAI_API_KEY: ${{ secrets.OPENAI_KEY }}
6060
GCP_LOCATION: ${{ secrets.GCP_LOCATION }}
6161
GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
62+
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
6263
run: |
6364
make test-cov
6465
@@ -73,6 +74,7 @@ jobs:
7374
OPENAI_API_KEY: ${{ secrets.OPENAI_KEY }}
7475
GCP_LOCATION: ${{ secrets.GCP_LOCATION }}
7576
GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
77+
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
7678
run: |
7779
cd docs/ && treon -v --exclude="./examples/openai_qna.ipynb"
7880

conftest.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ def client():
2626
def openai_key():
2727
return os.getenv("OPENAI_API_KEY")
2828

29+
@pytest.fixture
30+
def cohere_key():
31+
return os.getenv("COHERE_API_KEY")
32+
2933
@pytest.fixture
3034
def gcp_location():
3135
return os.getenv("GCP_LOCATION")

docs/api/vectorizer.rst

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,22 @@ VertexAITextVectorizer
6161
:inherited-members:
6262
:members:
6363

64+
65+
CohereTextVectorizer
66+
======================
67+
68+
.. _coheretextvectorizer_api:
69+
70+
.. currentmodule:: redisvl.vectorize.text.cohere
71+
72+
.. autosummary::
73+
74+
CohereTextVectorizer.__init__
75+
CohereTextVectorizer.embed
76+
CohereTextVectorizer.embed_many
77+
78+
.. autoclass:: CohereTextVectorizer
79+
:show-inheritance:
80+
:inherited-members:
81+
:members:
82+

docs/user_guide/vectorizers_04.ipynb

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
"1. OpenAI\n",
1212
"2. HuggingFace\n",
1313
"3. Vertex AI\n",
14+
"4. Cohere\n",
1415
"\n",
1516
"Before running this notebook, be sure to\n",
1617
"1. Have installed ``redisvl`` and have that environment active for this notebook.\n",
@@ -27,7 +28,7 @@
2728
},
2829
{
2930
"cell_type": "code",
30-
"execution_count": 1,
31+
"execution_count": 2,
3132
"metadata": {},
3233
"outputs": [],
3334
"source": [
@@ -298,6 +299,76 @@
298299
"test[:10]"
299300
]
300301
},
302+
{
303+
"cell_type": "markdown",
304+
"metadata": {},
305+
"source": [
306+
"### Cohere\n",
307+
"\n",
308+
"[Cohere](https://dashboard.cohere.ai/) allows you to implement language AI into your product. The `CohereTextVectorizer` makes it simple to use RedisVL with the embeddings models at Cohere. For this you will need to install `cohere`.\n",
309+
"\n",
310+
"```bash\n",
311+
"pip install cohere\n",
312+
"```"
313+
]
314+
},
315+
{
316+
"cell_type": "code",
317+
"execution_count": 2,
318+
"metadata": {},
319+
"outputs": [],
320+
"source": [
321+
"import getpass\n",
322+
"# setup the API Key\n",
323+
"api_key = os.environ.get(\"COHERE_API_KEY\") or getpass.getpass(\"Enter your Cohere API key: \")"
324+
]
325+
},
326+
{
327+
"cell_type": "markdown",
328+
"metadata": {},
329+
"source": [
330+
"\n",
331+
"Special attention needs to be paid to the `input_type` parameter for each `embed` call. For example, for embedding \n",
332+
"queries, you should set `input_type='search_query'`; for embedding documents, set `input_type='search_document'`. See\n",
333+
"more information [here](https://docs.cohere.com/reference/embed)"
334+
]
335+
},
336+
{
337+
"cell_type": "code",
338+
"execution_count": 3,
339+
"metadata": {},
340+
"outputs": [
341+
{
342+
"name": "stdout",
343+
"output_type": "stream",
344+
"text": [
345+
"Vector dimensions: 1024\n",
346+
"[-0.010856628, -0.019683838, -0.0062179565, 0.003545761, -0.047943115, 0.0009365082, -0.005924225, 0.016174316, -0.03289795, 0.049194336]\n",
347+
"Vector dimensions: 1024\n",
348+
"[-0.010108948, -0.016693115, -0.0002310276, -0.022644043, -0.04147339, 0.0021324158, -0.033477783, -0.0005378723, -0.02619934, 0.058013916]\n"
349+
]
350+
}
351+
],
352+
"source": [
353+
"from redisvl.vectorize.text import CohereTextVectorizer\n",
354+
"\n",
355+
"# create a vectorizer\n",
356+
"co = CohereTextVectorizer(\n",
357+
" model=\"embed-english-v3.0\",\n",
358+
" api_config={\"api_key\": api_key},\n",
359+
")\n",
360+
"\n",
361+
"# embed a search query\n",
362+
"test = co.embed(\"This is a test sentence.\", input_type='search_query')\n",
363+
"print(\"Vector dimensions: \", len(test))\n",
364+
"print(test[:10])\n",
365+
"\n",
366+
"# embed a document\n",
367+
"test = co.embed(\"This is a test sentence.\", input_type='search_document')\n",
368+
"print(\"Vector dimensions: \", len(test))\n",
369+
"print(test[:10])"
370+
]
371+
},
301372
{
302373
"cell_type": "markdown",
303374
"metadata": {},

redisvl/vectorize/base.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ def embed_many(
2828
preprocess: Optional[Callable] = None,
2929
batch_size: int = 1000,
3030
as_buffer: bool = False,
31+
**kwargs,
3132
) -> List[List[float]]:
3233
raise NotImplementedError
3334

@@ -36,6 +37,7 @@ def embed(
3637
text: str,
3738
preprocess: Optional[Callable] = None,
3839
as_buffer: bool = False,
40+
**kwargs,
3941
) -> List[float]:
4042
raise NotImplementedError
4143

@@ -45,6 +47,7 @@ async def aembed_many(
4547
preprocess: Optional[Callable] = None,
4648
batch_size: int = 1000,
4749
as_buffer: bool = False,
50+
**kwargs,
4851
) -> List[List[float]]:
4952
raise NotImplementedError
5053

@@ -53,6 +56,7 @@ async def aembed(
5356
text: str,
5457
preprocess: Optional[Callable] = None,
5558
as_buffer: bool = False,
59+
**kwargs,
5660
) -> List[float]:
5761
raise NotImplementedError
5862

redisvl/vectorize/text/__init__.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
1+
from redisvl.vectorize.text.cohere import CohereTextVectorizer
12
from redisvl.vectorize.text.huggingface import HFTextVectorizer
23
from redisvl.vectorize.text.openai import OpenAITextVectorizer
34
from redisvl.vectorize.text.vertexai import VertexAITextVectorizer
45

5-
__all__ = ["OpenAITextVectorizer", "HFTextVectorizer", "VertexAITextVectorizer"]
6+
__all__ = [
7+
"OpenAITextVectorizer",
8+
"HFTextVectorizer",
9+
"VertexAITextVectorizer",
10+
"CohereTextVectorizer",
11+
]

0 commit comments

Comments
 (0)