Add retry prompt for llm alignscore failure

lickem22 · lickem22 · commit 3bde1734f9db · 2024-08-28T12:59:30.000+03:00
diff --git a/core_backend/app/llm_call/llm_prompts.py b/core_backend/app/llm_call/llm_prompts.py
@@ -177,6 +177,11 @@ def get_prompt(cls) -> str:
 You are a helpful question-answering AI. You understand user question and answer their \
 question using the REFERENCE TEXT below.
 """
+RETRY_PROMPT_SUFFIX = """
+If the response above is not aligned with the question, please rectify this by considering \
+the following reason(s) for misalignment: "{failure_reason}". Make necessary adjustments \
+to ensure the answer is aligned with the question.
+"""
 RAG_RESPONSE_PROMPT = (
     _RAG_PROFILE_PROMPT
     + """
@@ -224,6 +229,7 @@ class RAG(BaseModel):
     answer: str
 
     prompt: ClassVar[str] = RAG_RESPONSE_PROMPT
+    retry_prompt: ClassVar[str] = RAG_RESPONSE_PROMPT + RETRY_PROMPT_SUFFIX
 
 
 class AlignmentScore(BaseModel):
diff --git a/core_backend/app/llm_call/llm_rag.py b/core_backend/app/llm_call/llm_rag.py
@@ -37,7 +37,14 @@ async def get_llm_rag_answer(
     """
 
     metadata = metadata or {}
-    prompt = RAG.prompt.format(context=context, original_language=original_language)
+    if "failure_reason" in metadata and metadata["failure_reason"]:
+        prompt = RAG.retry_prompt.format(
+            context=context,
+            original_language=original_language,
+            failure_reason=metadata["failure_reason"],
+        )
+    else:
+        prompt = RAG.prompt.format(context=context, original_language=original_language)
 
     result = await _ask_llm_async(
         user_message=question,
diff --git a/core_backend/app/llm_call/process_output.py b/core_backend/app/llm_call/process_output.py
@@ -56,7 +56,7 @@ async def generate_llm_query_response(
     Only runs if the generate_llm_response flag is set to True.
     Requires "search_results" and "original_language" in the response.
     """
-    if isinstance(response, QueryResponseError):
+    if isinstance(response, QueryResponseError) and not metadata["failure_reason"]:
         return response
 
     if response.search_results is None:
diff --git a/core_backend/app/question_answer/routers.py b/core_backend/app/question_answer/routers.py
@@ -125,6 +125,12 @@ async def search(
             query_refined=user_query_refined_template,
             response=response,
         )
+        if is_unable_to_generate_response(response):
+            failure_reason = response.debug_info["factual_consistency"]
+            response = await retry_search(
+                query_refined=user_query_refined_template, response=response
+            )
+            response.debug_info["past_failure"] = failure_reason
 
     await save_query_response_to_db(user_query_db, response, asession)
     await increment_query_count(
@@ -230,7 +236,6 @@ async def voice_search(
         asession=asession,
         exclude_archived=True,
     )
-
     if user_query.generate_llm_response:
         response = await get_generation_response(
             query_refined=user_query_refined_template,
@@ -343,18 +348,15 @@ def is_unable_to_generate_response(response: QueryResponse) -> bool:
 async def retry_search(
     query_refined: QueryRefined,
     response: QueryResponse | QueryResponseError,
-    user_id: int,
-    n_similar: int,
-    asession: AsyncSession,
-    exclude_archived: bool = True,
 ) -> QueryResponse | QueryResponseError:
     """
-    Retry wrapper for search_base.
+    Retry wrapper for get_generation_response.
     """
 
-    return await search_base(
-        query_refined, response, user_id, n_similar, asession, exclude_archived
-    )
+    metadata = query_refined.query_metadata
+    metadata["failure_reason"] = response.debug_info["factual_consistency"]["reason"]
+    query_refined.query_metadata = metadata
+    return await get_generation_response(query_refined, response)
 
 
 @generate_tts__after
@@ -376,10 +378,13 @@ async def get_generation_response(
         query_id=response.query_id, user_id=query_refined.user_id
     )
 
+    metadata["failure_reason"] = query_refined.query_metadata.get(
+        "failure_reason", None
+    )
+
     response = await generate_llm_query_response(
         query_refined=query_refined, response=response, metadata=metadata
     )
-
     return response