|
7 | 7 | https://python.langchain.com/docs/integrations/retrievers/pinecone_hybrid_search |
8 | 8 | """ |
9 | 9 |
|
| 10 | +# document loading |
10 | 11 | import glob |
11 | 12 | import os |
12 | 13 | import textwrap |
13 | | -from typing import List # ClassVar |
| 14 | +from typing import List |
14 | 15 |
|
15 | 16 | # pinecone integration |
16 | 17 | import pinecone |
17 | 18 | from langchain.cache import InMemoryCache |
18 | | - |
19 | | -# prompting and chat |
20 | 19 | from langchain.chat_models import ChatOpenAI |
21 | | - |
22 | | -# document loading |
23 | 20 | from langchain.document_loaders import PyPDFLoader |
24 | 21 |
|
25 | 22 | # embedding |
26 | 23 | from langchain.embeddings import OpenAIEmbeddings |
27 | | - |
28 | | -# vector database |
29 | 24 | from langchain.globals import set_llm_cache |
| 25 | + |
| 26 | +# prompting and chat |
30 | 27 | from langchain.llms.openai import OpenAI |
31 | 28 | from langchain.prompts import PromptTemplate |
| 29 | + |
| 30 | +# hybrid search capability |
32 | 31 | from langchain.retrievers import PineconeHybridSearchRetriever |
33 | 32 | from langchain.schema import HumanMessage, SystemMessage |
34 | 33 | from langchain.text_splitter import Document |
|
39 | 38 | from models.const import Credentials |
40 | 39 |
|
41 | 40 |
|
42 | | -# from pydantic import BaseModel, ConfigDict, Field |
43 | | - |
44 | | - |
45 | 41 | ############################################################################### |
46 | 42 | # initializations |
47 | 43 | ############################################################################### |
@@ -107,11 +103,6 @@ def prompt_with_template(self, prompt: PromptTemplate, concept: str, model: str |
107 | 103 | retval = llm(prompt.format(concept=concept)) |
108 | 104 | return retval |
109 | 105 |
|
110 | | - def split_text(self, text: str) -> List[Document]: |
111 | | - """Split text. Leaving this here for now, since it exposes the return type.""" |
112 | | - retval = self.text_splitter.create_documents([text]) |
113 | | - return retval |
114 | | - |
115 | 106 | def fit_tf_idf_values(self, corpus: List[str]): |
116 | 107 | """Fit TF-IDF values. |
117 | 108 | 1. Fit the BM25 encoder on the corpus |
|
0 commit comments