diff --git a/README.md b/README.md
index b1ec196..9a29227 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
# Watsonx.ai RAG Application
-This application exposes APIs to help set up a RAG pipeline using either **Watson Discovery** or **watsonx Discovery** (Elasticsearch) as the document respository.
+This application exposes APIs to help set up a RAG pipeline using **watsonx Discovery** (Elasticsearch) as the document respository.
This README will guide you through the steps to deploy the project locally, on OpenShift or IBM Code Engine. Additionally, you will learn how to access the Swagger documentation once the project is deployed.
@@ -69,13 +69,24 @@ The specified model must be downloaded and deployed into the Elasticsearch insta
3. Click the `Try it out` button and customize your request body:
```
{
- "bucket_name": "rag-app-test",
- "es_index_name": "rag-llm-ingest-index",
- "es_pipeline_name": "rag-llm-ingest",
+ "GUID": "string",
+ "title": "string",
+ "URL": "string",
+ "content": "string",
+ "tags": [
+ "string"
+ ],
+ "updated_date": "2024-09-26T19:33:28.221Z",
+ "view_security_roles": [
+ "string"
+ ],
+ "es_index_name": "string",
+ "es_pipeline_name": "string",
"chunk_size": "512",
"chunk_overlap": "256",
"es_model_name": ".elser_model_2_linux-x86_64",
- "es_index_text_field": "body_content_field"
+ "es_model_text_field": "text_field",
+ "es_index_text_field": "content"
}
```
@@ -146,49 +157,6 @@ The `queryLLM` API queries a connected Elasticsearch database then sends the ret
NOTE: The `filters` tag allows you to narrow down which documents to search on. You can specify from fields available within the document metadata. Remove this element if you don't want to filter on metadata.
-#### queryWDLLM
-
-The `queryWDLLM` API queries a connected **Watson Discovery** project then sends the returned text into **watsonx.ai** using the designated LLM to return a natural language response.
-
-1. Authenticate the `queryWDLLM` api by clicking the lock button to the right. Enter the value you added for the `RAG_APP_API_KEY`.
-
-3. Click the `Try it out` button and customize your request body:
- ```
- {
- "question": "string",
- "project_id": "string",
- "collection_id": "string",
- "wd_version": "2020-08-30",
- "wd_return_params": [
- "Title",
- "Text"
- ],
- "llm_instructions": "[INST]<>You are a helpful, respectful, and honest assistant. Always answer as helpfully as possible, while being safe. Be brief in your answers. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'''t know the answer to a question, please do not share false information. <>\nGenerate the next agent response by answering the question. You are provided several documents with titles. If the answer comes from different documents please mention all possibilities and use the tiles of documents to separate between topics or domains. Answer with no more than 150 words. If you cannot base your answer on the given document, please state that you do not have an answer.\n{context_str}<>\n\n{query_str} Answer with no more than 150 words. If you cannot base your answer on the given document, please state that you do not have an answer. [/INST]",
- "num_results": "5",
- "llm_params": {
- "model_id": "meta-llama/llama-3-70b-instruct",
- "inputs": [],
- "parameters": {
- "decoding_method": "greedy",
- "max_new_tokens": 500,
- "min_new_tokens": 1,
- "moderations": {
- "hap_input": "true",
- "hap_output": "true",
- "threshold": 0.75
- },
- "repetition_penalty": 1.1,
- "temperature": 0.7,
- "top_k": 50,
- "top_p": 1
- }
- },
- "wd_document_names": [
- "acme.pdf",
- "test.docx"
- ]
- }
- ```
### Test from cURL
@@ -213,8 +181,4 @@ curl --location '/queryLLM' \
"question": "",
}
```
-4. Hit the blue `SEND` button and wait for your result.
-
-## Connecting this application to watsonx Assistant
-
-You can connect watsonx Assistant to invoke the `queryLLM` or `queryWDLLM` APIs. See the steps [here.](./watsonx-assistant-setup/README.md)
+4. Hit the blue `SEND` button and wait for your result.
diff --git a/application/app.py b/application/app.py
index e84ac4e..b8e4496 100644
--- a/application/app.py
+++ b/application/app.py
@@ -3,6 +3,7 @@
import uvicorn
import sys
import time
+from datetime import datetime
from utils import CloudObjectStorageReader, CustomWatsonX, create_sparse_vector_query_with_model, create_sparse_vector_query_with_model_and_filter
from dotenv import load_dotenv
@@ -12,12 +13,14 @@
from fastapi.security.api_key import APIKeyHeader
from starlette.status import HTTP_403_FORBIDDEN
from fastapi.middleware.cors import CORSMiddleware
+from aiohttp import ClientSession
+import asyncio
# ElasticSearch
from elasticsearch import Elasticsearch, AsyncElasticsearch
# Vector Store / WatsonX connection
-from llama_index.core import VectorStoreIndex, StorageContext, PromptTemplate, Settings
+from llama_index.core import Document, VectorStoreIndex, StorageContext, PromptTemplate, Settings
from llama_index.core.node_parser import SentenceSplitter
from llama_index.vector_stores.elasticsearch import ElasticsearchStore
from llama_index.core.vector_stores.types import MetadataFilters, ExactMatchFilter, FilterOperator, MetadataFilter
@@ -35,11 +38,22 @@
from customTypes.ingestResponse import ingestResponse
from customTypes.queryLLMRequest import queryLLMRequest
from customTypes.queryLLMResponse import queryLLMResponse
-from customTypes.queryWDLLMRequest import queryWDLLMRequest
-from customTypes.queryWDLLMResponse import queryWDLLMResponse
app = FastAPI()
+session = None # Global session variable
+
+@app.on_event("startup")
+async def startup_event():
+ global session
+ session = ClientSession() # Create a new session
+
+@app.on_event("shutdown")
+async def shutdown_event():
+ global session
+ if session:
+ await session.close() # Properly close the session
+
# Set up CORS
origins = ["*"]
@@ -67,11 +81,6 @@
"wxdurl": os.environ.get("WXD_URL")
}
-wd_creds = {
- "apikey": os.environ.get("WD_API_KEY"),
- "wd_url": os.environ.get("WD_URL")
-}
-
# WML Creds
wml_credentials = {
"url": os.environ.get("WX_URL"),
@@ -111,7 +120,14 @@ def index():
@app.post("/ingestDocs")
async def ingestDocs(request: ingestRequest, api_key: str = Security(get_api_key))->ingestResponse:
- cos_bucket_name = request.bucket_name
+ GUID = request.GUID
+ title = request.title
+ URL = request.URL
+ content = request.content
+ content_type= request.content_type
+ tags = request.tags
+ updated_date = request.updated_date.strftime("%Y-%m-%dT%H:%M:%S") if isinstance(request.updated_date, datetime) else request.updated_date
+ view_security_roles = request.view_security_roles
chunk_size = request.chunk_size
chunk_overlap = request.chunk_overlap
es_index_name = request.es_index_name
@@ -122,21 +138,6 @@ async def ingestDocs(request: ingestRequest, api_key: str = Security(get_api_key
# TODO: Metadata to add to nodes, could be anything from the user, maybe a list?
#metadata_fields = request.metadata_fields
- # try:
- cos_reader = CloudObjectStorageReader(
- bucket_name = cos_bucket_name,
- credentials = {
- "apikey": cos_creds["cosIBMApiKeyId"],
- "service_instance_id": cos_creds["cosServiceInstanceId"]
- },
- hostname = cos_creds["cosEndpointURL"]
- )
-
- print(cos_reader.list_files())
-
- documents = await cos_reader.load_data()
- print(f"Total documents: {len(documents)}")
-
try:
async_es_client = AsyncElasticsearch(
wxd_creds["wxdurl"],
@@ -165,7 +166,31 @@ async def ingestDocs(request: ingestRequest, api_key: str = Security(get_api_key
text_field=es_index_text_field
)
+ splitter = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+
+ # Split the content into chunks
+ content_chunks = splitter.split_text(content)
+
try:
+ # Create documents for each chunk
+ documents = []
+ for i, chunk in enumerate(content_chunks):
+ documents.append(
+ Document(
+ text=chunk,
+ metadata={
+ "guid": GUID,
+ "title": title,
+ "content_type": content_type,
+ "url": URL,
+ "tags": tags,
+ "updated_date": updated_date,
+ "view_security_roles": view_security_roles,
+ "chunk_index": i
+ }
+ )
+ )
+
index = VectorStoreIndex.from_documents(
documents,
storage_context=StorageContext.from_defaults(vector_store=vector_store),
@@ -175,7 +200,7 @@ async def ingestDocs(request: ingestRequest, api_key: str = Security(get_api_key
except Exception as e:
return ingestResponse(response = json.dumps({"error": repr(e)}))
else:
- return ingestResponse(response="success: number of documents loaded " + str(len(documents)))
+ return ingestResponse(response="success: content loaded")
async def create_index(client, index_name, esIndexTextField, pipeline_name):
print("Creating the index...")
@@ -183,7 +208,15 @@ async def create_index(client, index_name, esIndexTextField, pipeline_name):
"mappings": {
"properties": {
"ml.tokens": {"type": "rank_features"},
- esIndexTextField: {"type": "text"}}
+ esIndexTextField: {"type": "text"},
+ "guid": {"type": "keyword"},
+ "title": {"type": "text"},
+ "content_type": {"type": "text"},
+ "url": {"type": "text"},
+ "tags": {"type": "keyword"},
+ "updated_date": {"type": "date"},
+ "view_security_roles": {"type": "text"},
+ }
},
"settings": {
"index.default_pipeline": pipeline_name,
@@ -215,8 +248,12 @@ async def create_inference_pipeline(client, pipeline_name, esIndexTextField, esM
"inference_config": {"text_expansion": {"results_field": "tokens"}},
}
},
- {"set": {"field": "file_name", "value": "{{metadata.file_name}}"}},
- {"set": {"field": "url", "value": "{{metadata.url}}"}},
+ {"set": {"field": "guid", "value": "{{metadata.guid}}"}},
+ {"set": {"field": "title", "value": "{{metadata.title}}"}},
+ {"set": {"field": "content_type", "value": "{{metadata.content_type}}"}},
+ {"set": {"field": "tags", "value": "{{metadata.tags}}"}},
+ {"set": {"field": "updated_date", "value": "{{metadata.updated_date}}"}},
+ {"set": {"field": "view_security_roles", "value": "{{metadata.view_security_roles}}"}},
],
"version": 1,
}
@@ -352,195 +389,6 @@ def get_custom_watsonx(model_id, additional_kwargs):
custom_watsonx_cache[cache_key] = custom_watsonx
return custom_watsonx
-@app.post("/queryWDLLM")
-def queryWDLLM(request: queryWDLLMRequest, api_key: str = Security(get_api_key))->queryWDLLMResponse:
- question = request.question
- num_results = request.num_results
- llm_params = request.llm_params
- wd_document_names= request.wd_document_names
- project_id = request.project_id
- collection_id = request.collection_id
- wd_version = request.wd_version
- wd_return_params = request.wd_return_params
- llm_instructions = request.llm_instructions
-
- # Sanity check for instructions
- if "{query_str}" not in llm_instructions or "{context_str}" not in llm_instructions:
- data_response = {
- "llm_response": "",
- "references": [{"error":"Please add {query_str} and {context_str} placeholders to the instructions."}]
- }
- return queryLLMResponse(**data_response)
-
- # Sanity check for Watson Discovery
- if not wd_creds["apikey"] or wd_creds["wd_url"] == "":
- data_response = {
- "llm_response": "",
- "references": [{"error":"Please update the environment variables for Watson Discovery: WD_API & WD_URL"}]
- }
- return queryLLMResponse(**data_response)
-
- authenticator = IAMAuthenticator(wd_creds["apikey"])
- discovery = DiscoveryV2(
- version=wd_version,
- authenticator=authenticator
- )
-
- discovery.set_service_url(wd_creds["wd_url"])
-
- generate_params = {
- GenParams.MIN_NEW_TOKENS: llm_params.parameters.min_new_tokens,
- GenParams.MAX_NEW_TOKENS: llm_params.parameters.max_new_tokens,
- GenParams.DECODING_METHOD: llm_params.parameters.decoding_method,
- GenParams.REPETITION_PENALTY: llm_params.parameters.repetition_penalty,
- GenParams.TEMPERATURE: llm_params.parameters.temperature,
- GenParams.TOP_K: llm_params.parameters.top_k,
- GenParams.TOP_P: llm_params.parameters.top_p
- }
-
- model = Model(
- model_id=llm_params.model_id,
- params=generate_params,
- credentials={
- "apikey": os.environ.get("IBM_CLOUD_API_KEY"),
- "url": os.environ.get("WX_URL")
- },
- project_id=os.environ.get("WX_PROJECT_ID")
- )
-
- results = []
- wd_contexts = []
-
- # Filter the documents if the user provides it.
- if wd_document_names:
- all_results = []
-
- listDocs = discovery.list_documents(
- project_id=project_id,
- collection_id=collection_id
- )
-
- data = listDocs.result
-
- doc_id_list = []
- # Get the document details for each document passed by the user
- for doc_id in data["documents"]:
- doc = discovery.get_document(
- project_id=project_id,
- collection_id=collection_id,
- document_id=doc_id['document_id']
- ).get_result()
-
- # Create an object containing the document name and its doc id
- for wd_document_name in wd_document_names:
- if doc["filename"] == wd_document_name:
- doc_id_list.append({'doc_name': wd_document_name, 'doc_id': doc_id['document_id']})
-
- # Sanity checking to make sure the provided documents are available.
- if not doc_id_list or len(doc_id_list) != len(wd_document_names):
- data_response = {
- "llm_response": "One or more documents are not found in the Watson Discovery Collection or Project",
- "references": [{"node":"not implemented"}]
- }
-
- return queryWDLLMResponse(**data_response)
-
- for doc in doc_id_list:
- # Query WD based on a specific document and the NLQ question
- # https://cloud.ibm.com/docs/discovery-data?topic=discovery-data-query-reference
- # Link above contains the operator :: from the filter below
- discovery_json = discovery.query(
- project_id=project_id,
- filter='document_id::' + str(doc["doc_id"]),
- return_=wd_return_params,
- natural_language_query=question,
- count=num_results
- ).get_result()
-
- all_results.append(discovery_json["results"])
-
- # Iterate over all of the filtered WD results and prepare the passages for prompting
- for results in all_results:
- for document in results:
- document_id = document['document_id']
- passages = document['document_passages']
- results = []
-
- # Find the document title by its ID
- document_title = None
- for item in doc_id_list:
- if item['doc_id'] == document_id:
- document_title = item['doc_name']
- break
-
- for item in passages:
- # Remove the and tags from the passage
- passage_text = item["passage_text"].replace("", "").replace("", "")
-
- # If document_title is available append it to the passage_text for context
- if document_title:
- passage_text = f"{document_title}: {passage_text}"
-
- results.append(passage_text)
-
- # Join all passages for a single document and append to wd_contexts
- wd_contexts.append("\n".join(results))
-
- # Do a general search without filters
- else:
- discovery_json = discovery.query(
- project_id=project_id,
- return_=wd_return_params,
- natural_language_query=question,
- count=num_results
- ).get_result()
-
- # Iterate over the WD results and prepare the passages for prompting
- for document in discovery_json["results"]:
- document_id = document['document_id']
- passages = document['document_passages']
- results = []
-
- # Find the document title by its ID
- document_title = None
- doc = discovery.get_document(
- project_id=project_id,
- collection_id=collection_id,
- document_id=document_id
- ).get_result()
- document_title = doc["filename"]
-
- for item in passages:
- # Remove the tags
- passage_text = item["passage_text"].replace("", "").replace("", "")
-
- # If document_title is available append it to the passage_text for context
- if document_title:
- passage_text = f"{document_title}: {passage_text}"
-
- results.append(passage_text)
- # Join all passages for a single document and append to wd_contexts
- wd_contexts.append("\n".join(results))
-
- prompt = get_custom_prompt(llm_instructions, wd_contexts, question)
-
- generated_response = model.generate(prompt=prompt)
- response=generated_response['results'][0]['generated_text']
-
- data_response = {
- "llm_response": response,
- "references": [{"node":"not implemented"}]
- }
-
- return queryWDLLMResponse(**data_response)
-
-def get_custom_prompt(llm_instructions, wd_contexts, query_str):#
- context_str = "\n".join(wd_contexts)
-
- # Replace the placeholders in llm_instructions with the actual query and context
- prompt = llm_instructions.replace("{query_str}", query_str).replace("{context_str}", context_str)
- return prompt
-
if __name__ == '__main__':
if 'uvicorn' not in sys.argv[0]:
uvicorn.run("app:app", host='0.0.0.0', port=4050, reload=True)
diff --git a/application/customTypes/ingestRequest.py b/application/customTypes/ingestRequest.py
index 8f9012b..1d7815a 100644
--- a/application/customTypes/ingestRequest.py
+++ b/application/customTypes/ingestRequest.py
@@ -1,14 +1,23 @@
from pydantic import BaseModel, Field
from typing import Optional, List
+from datetime import datetime
class ingestRequest(BaseModel):
- bucket_name: str = Field(title="COS Bucket Name", description="Name of your cloud object storage bucket.")
+ #bucket_name: str = Field(title="COS Bucket Name", description="Name of your cloud object storage bucket.")
+ GUID: str = Field(title="GUID", description="Unique identifier for the document.")
+ title: str = Field(None, title="Title", description="Title of the document.")
+ URL: str = Field(None, title="Document URL", description="URL where the document is hosted.")
+ content: str = Field(None, title="Content", description="Main body content of the document.")
+ content_type: str = Field(None, title="Content Type", description="Type of body content of the document.")
+ tags: List[str] = Field(default=[], title="Tags", description="List of tags associated with the document.")
+ updated_date: datetime = Field(None, title="Update Date", description="Date when the document was last updated.")
+ view_security_roles: List[str] = Field(default=[], title="View Security Roles", description="List of security roles")
es_index_name: str = Field(title="ElasticSearch Index Name", description="Name of the elasticsearch index you want to create.")
es_pipeline_name: str = Field(title="ElasticSearch Pipeline Name", description="Name of the elasticsearch pipeline you want to create.")
- chunk_size: Optional[str] = Field(default="512")
- chunk_overlap: Optional[str] = Field(default="256")
- es_model_name: Optional[str] = Field(default=".elser_model_2_linux-x86_64")
- es_model_text_field: Optional[str] = Field(default="text_field")
- es_index_text_field: Optional[str] = Field(default="body_content_field")
+ chunk_size: str = Field(default="512")
+ chunk_overlap: str = Field(default="256")
+ es_model_name: str = Field(default=".elser_model_2_linux-x86_64")
+ es_model_text_field: str = Field(default="text_field")
+ es_index_text_field: str = Field(default="content")
# TODO: Implement metadata
# metadata_fields: Optional[List[str]] = None
diff --git a/application/customTypes/queryLLMRequest.py b/application/customTypes/queryLLMRequest.py
index 59fae3e..0175dcb 100644
--- a/application/customTypes/queryLLMRequest.py
+++ b/application/customTypes/queryLLMRequest.py
@@ -39,7 +39,7 @@ class Config:
class queryLLMRequest(BaseModel):
question: str
es_index_name: str
- es_index_text_field: Optional[str] = Field(default="body_content_field")
+ es_index_text_field: Optional[str] = Field(default="content")
es_model_name: Optional[str] = Field(default=".elser_model_2_linux-x86_64")
es_model_text_field: Optional[str] = Field(default="ml.tokens")
llm_instructions: Optional[str] = Field(default="[INST]<>You are a helpful, respectful, and honest assistant. Always answer as helpfully as possible, while being safe. Be brief in your answers. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'\''t know the answer to a question, please do not share false information. <>\nGenerate the next agent response by answering the question. You are provided several documents with titles. If the answer comes from different documents please mention all possibilities and use the tiles of documents to separate between topics or domains. Answer with no more than 150 words. If you cannot base your answer on the given document, please state that you do not have an answer.\n{context_str}<>\n\n{query_str} Answer with no more than 150 words. If you cannot base your answer on the given document, please state that you do not have an answer. [/INST]", title="LLM Instructions", description="Instructions for LLM")
@@ -47,7 +47,5 @@ class queryLLMRequest(BaseModel):
llm_params: Optional[LLMParams] = LLMParams()
filters: Optional[Dict[str, Any]] = Field(None,
example={
- "date": "2022-01-01",
- "file_name": "test.pdf"
})
diff --git a/application/customTypes/queryWDLLMRequest.py b/application/customTypes/queryWDLLMRequest.py
deleted file mode 100644
index 26d8938..0000000
--- a/application/customTypes/queryWDLLMRequest.py
+++ /dev/null
@@ -1,51 +0,0 @@
-from pydantic import BaseModel, Field
-from typing import Optional, Dict, Any, List
-
-class Moderations(BaseModel):
- hap_input: str = 'true'
- threshold: float = 0.75
- hap_output: str = 'true'
-
-class Parameters(BaseModel):
- decoding_method: str = "greedy"
- min_new_tokens: int = 1
- max_new_tokens: int = 500
- repetition_penalty: float = 1.1
- temperature: float = 0.7
- top_k: int = 50
- top_p: int = 1
- moderations: Moderations = Moderations()
-
- def dict(self, *args, **kwargs):
- """
- Override dict() method to return a dictionary representation
- """
- params_dict = super().dict(*args, **kwargs)
- params_dict['moderations'] = self.moderations.dict()
- return params_dict
-
-class LLMParams(BaseModel):
- model_id: str = "meta-llama/llama-3-70b-instruct"
- inputs: list = []
- parameters: Parameters = Parameters()
-
- # Resolves warning error with model_id:
- # Field "model_id" has conflict with protected namespace "model_".
- # You may be able to resolve this warning by setting `model_config['protected_namespaces'] = ()`.
- # warnings.warn(
- class Config:
- protected_namespaces = ()
-
-class queryWDLLMRequest(BaseModel):
- question: str
- project_id: str
- collection_id: str
- wd_version: Optional[str] = Field(default='2020-08-30')
- wd_return_params: Optional[List[str]] = Field(default=["Title", "Text"], description="Params to pull from WD. Defaults Title and Text.")
- llm_instructions: Optional[str] = Field(default="[INST]<>You are a helpful, respectful, and honest assistant. Always answer as helpfully as possible, while being safe. Be brief in your answers. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'\''t know the answer to a question, please do not share false information. <>\nGenerate the next agent response by answering the question. You are provided several documents with titles. If the answer comes from different documents please mention all possibilities and use the tiles of documents to separate between topics or domains. Answer with no more than 150 words. If you cannot base your answer on the given document, please state that you do not have an answer.\n{context_str}<>\n\n{query_str} Answer with no more than 150 words. If you cannot base your answer on the given document, please state that you do not have an answer. [/INST]", title="LLM Instructions", description="Instructions for LLM")
- num_results: Optional[str] = Field(default="5")
- llm_params: Optional[LLMParams] = LLMParams()
- wd_document_names: Optional[List[str]] = Field(None,
- example=["acme.pdf", "test.docx"]
- )
-
diff --git a/application/customTypes/queryWDLLMResponse.py b/application/customTypes/queryWDLLMResponse.py
deleted file mode 100644
index 723578b..0000000
--- a/application/customTypes/queryWDLLMResponse.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from pydantic import BaseModel
-from typing import List, Dict
-
-class queryWDLLMResponse(BaseModel):
- llm_response: str
- references: List[Dict]
diff --git a/application/env b/application/env
index 1e35c46..27fdcb4 100644
--- a/application/env
+++ b/application/env
@@ -22,7 +22,3 @@ WX_PROJECT_ID=""
WXD_URL=":"
WXD_USERNAME=""
WXD_PASSWORD=""
-
-# Watson Discovery connection, used by searchWDLLM
-WD_API_KEY=""
-WD_URL=""
diff --git a/application/utils.py b/application/utils.py
index b37eb2f..0f012bc 100644
--- a/application/utils.py
+++ b/application/utils.py
@@ -296,7 +296,7 @@ def __init__(
) -> None:
super().__init__(
credentials=credentials,
- model_id="meta-llama/llama-2-70b-chat",
+ model_id="meta-llama/llama-3-70b-instruct",
project_id=project_id,
space_id=space_id,
max_new_tokens=max_new_tokens,
diff --git a/openshift-setup/secrets.yaml b/openshift-setup/secrets.yaml
index eaabc65..7319226 100644
--- a/openshift-setup/secrets.yaml
+++ b/openshift-setup/secrets.yaml
@@ -15,5 +15,3 @@ stringData:
WXD_USERNAME:
WXD_PASSWORD:
WXD_URL:
- WD_API_KEY:
- WD_URL:
diff --git a/openshift-setup/snippet-deployment.yaml b/openshift-setup/snippet-deployment.yaml
index f30ce2c..80e0aa5 100644
--- a/openshift-setup/snippet-deployment.yaml
+++ b/openshift-setup/snippet-deployment.yaml
@@ -49,16 +49,6 @@
secretKeyRef:
name:
key: WXD_PASSWORD
- - name: WD_API_KEY
- valueFrom:
- secretKeyRef:
- name:
- key: WD_API_KEY
- - name: WD_API_KEY
- valueFrom:
- secretKeyRef:
- name:
- key: WD_API_KEY
securityContext:
capabilities:
drop: