2020import glob
2121import os
2222import textwrap
23- from typing import List
2423
2524# pinecone integration
2625import pinecone
4443from pinecone_text .sparse import BM25Encoder
4544
4645# this project
47- from models .const import Credentials
46+ from models .const import Config , Credentials
4847
4948
5049###############################################################################
5150# initializations
5251###############################################################################
53- DEFAULT_MODEL_NAME = "text-davinci-003"
52+ DEFAULT_MODEL_NAME = Config . OPENAI_PROMPT_MODEL_NAME
5453pinecone .init (api_key = Credentials .PINECONE_API_KEY , environment = Credentials .PINECONE_ENVIRONMENT )
5554set_llm_cache (InMemoryCache ())
5655
5756
5857class TextSplitter :
5958 """
60- Custom text splitter that add metadata to the Document object
59+ Custom text splitter that adds metadata to the Document object
6160 which is required by PineconeHybridSearchRetriever.
6261 """
6362
64- # ...
65-
6663 def create_documents (self , texts ):
6764 """Create documents"""
6865 documents = []
@@ -74,16 +71,16 @@ def create_documents(self, texts):
7471
7572
7673class HybridSearchRetriever :
77- """Sales Support Model (SSM). """
74+ """Hybrid Search Retriever (OpenAI + Pinecone) """
7875
7976 # prompting wrapper
8077 chat = ChatOpenAI (
8178 api_key = Credentials .OPENAI_API_KEY ,
8279 organization = Credentials .OPENAI_API_ORGANIZATION ,
83- cache = True ,
84- max_retries = 3 ,
85- model = "gpt-3.5-turbo" ,
86- temperature = 0.0 ,
80+ cache = Config . OPENAI_CHAT_CACHE ,
81+ max_retries = Config . OPENAI_CHAT_MAX_RETRIES ,
82+ model = Config . OPENAI_CHAT_MODEL_NAME ,
83+ temperature = Config . OPENAI_CHAT_TEMPERATURE ,
8784 )
8885
8986 # embeddings
@@ -112,22 +109,6 @@ def prompt_with_template(self, prompt: PromptTemplate, concept: str, model: str
112109 retval = llm (prompt .format (concept = concept ))
113110 return retval
114111
115- def fit_tf_idf_values (self , corpus : List [str ]):
116- """Fit TF-IDF values.
117- 1. Fit the BM25 encoder on the corpus
118- 2. Encode the corpus
119- 3. Store the encoded corpus in Pinecone
120- """
121- corpus = ["foo" , "bar" , "world" , "hello" ]
122-
123- # fit tf-idf values on your corpus
124- self .bm25_encoder .fit (corpus )
125-
126- # persist the values to a json file
127- self .bm25_encoder .dump ("bm25_values.json" )
128- self .bm25_encoder = BM25Encoder ().load ("bm25_values.json" )
129- self .bm25_encoder .fit (corpus )
130-
131112 def load (self , filepath : str ):
132113 """
133114 Embed PDF.
@@ -201,9 +182,9 @@ def rag(self, prompt: str):
201182 document_texts = [doc .page_content for doc in documents ]
202183 leader = textwrap .dedent (
203184 """\
204- You can assume that the following is true,
205- and you should attempt to incorporate these facts
206- in your response:
185+ \n \n You can assume that the following is true.
186+ You should attempt to incorporate these facts
187+ into your response:\n \n
207188 """
208189 )
209190
0 commit comments