1818
1919# document loading
2020import glob
21+ import logging
2122import os
2223import textwrap
2324
5253DEFAULT_MODEL_NAME = Config .OPENAI_PROMPT_MODEL_NAME
5354pinecone .init (api_key = Credentials .PINECONE_API_KEY , environment = Credentials .PINECONE_ENVIRONMENT )
5455set_llm_cache (InMemoryCache ())
56+ logging .basicConfig (level = logging .DEBUG if Config .DEBUG_MODE else logging .INFO )
5557
5658
5759class TextSplitter :
@@ -123,16 +125,16 @@ def load(self, filepath: str):
123125 https://docs.pinecone.io/docs/manage-indexes#selective-metadata-indexing
124126 """
125127 try :
126- print ("Deleting index..." )
128+ logging . debug ("Deleting index..." )
127129 pinecone .delete_index (Credentials .PINECONE_INDEX_NAME )
128130 except pinecone .exceptions .PineconeException :
129- print ("Index does not exist. Continuing..." )
131+ logging . debug ("Index does not exist. Continuing..." )
130132
131133 metadata_config = {
132134 "indexed" : ["lc_id" , "lc_type" ],
133135 "context" : ["lc_text" ],
134136 }
135- print ("Creating index. This may take a few minutes..." )
137+ logging . debug ("Creating index. This may take a few minutes..." )
136138 pinecone .create_index (
137139 Credentials .PINECONE_INDEX_NAME , dimension = 1536 , metric = "dotproduct" , metadata_config = metadata_config
138140 )
@@ -142,19 +144,19 @@ def load(self, filepath: str):
142144 for pdf_file in pdf_files :
143145 i += 1
144146 j = len (pdf_files )
145- print ( f "Loading PDF { i } of { j } : " , pdf_file )
147+ logging . debug ( "Loading PDF %s of %s: %s" , i , j , pdf_file )
146148 loader = PyPDFLoader (file_path = pdf_file )
147149 docs = loader .load ()
148150 k = 0
149151 for doc in docs :
150152 k += 1
151- print (k * "-" , end = "\r " )
153+ logging . debug (k * "-" , end = "\r " )
152154 documents = self .text_splitter .create_documents ([doc .page_content ])
153155 document_texts = [doc .page_content for doc in documents ]
154156 embeddings = self .openai_embeddings .embed_documents (document_texts )
155157 self .vector_store .add_documents (documents = documents , embeddings = embeddings )
156158
157- print ("Finished loading PDFs" )
159+ logging . debug ("Finished loading PDFs" )
158160
159161 def rag (self , prompt : str ):
160162 """
@@ -176,7 +178,7 @@ def rag(self, prompt: str):
176178 embeddings = self .openai_embeddings , sparse_encoder = self .bm25_encoder , index = self .pinecone_index
177179 )
178180 documents = retriever .get_relevant_documents (query = prompt )
179- print ( f "Retrieved { len ( documents ) } related documents from Pinecone" )
181+ logging . debug ( "Retrieved %i related documents from Pinecone" , len ( documents ) )
180182
181183 # Extract the text from the documents
182184 document_texts = [doc .page_content for doc in documents ]
@@ -191,14 +193,14 @@ def rag(self, prompt: str):
191193 # Create a prompt that includes the document texts
192194 prompt_with_relevant_documents = f"{ prompt + leader } { '. ' .join (document_texts )} "
193195
194- print ( f "Prompt contains { len (prompt_with_relevant_documents .split ())} words" )
195- print ("Prompt:" , prompt_with_relevant_documents )
196+ logging . debug ( "Prompt contains %i words" , len (prompt_with_relevant_documents .split ()))
197+ logging . debug ("Prompt: %s " , prompt_with_relevant_documents )
196198
197199 # Get a response from the GPT-3.5-turbo model
198200 response = self .cached_chat_request (
199201 system_message = "You are a helpful assistant." , human_message = prompt_with_relevant_documents
200202 )
201203
202- print ("Response:" )
203- print ("------------------------------------------------------" )
204+ logging . debug ("Response:" )
205+ logging . debug ("------------------------------------------------------" )
204206 return response
0 commit comments