11import argparse
22import json
33import random
4+ import time
45from collections import defaultdict
56from concurrent .futures import ThreadPoolExecutor , as_completed
67from datetime import datetime , timedelta
1112from app .config import (
1213 LITELLM_API_KEY ,
1314 LITELLM_ENDPOINT ,
14- LITELLM_MODEL_DEFAULT ,
15+ LITELLM_MODEL_GENERATION ,
1516)
17+ from app .contents .models import ContentDB
1618from app .database import get_session
17- from app .question_answer .models import ContentFeedbackDB , QueryDB , ResponseFeedbackDB
19+ from app .llm_call .utils import remove_json_markdown
20+ from app .question_answer .models import (
21+ ContentFeedbackDB ,
22+ QueryDB ,
23+ QueryResponseContentDB ,
24+ ResponseFeedbackDB ,
25+ )
1826from app .urgency_detection .models import UrgencyQueryDB
1927from app .users .models import UserDB
2028from app .utils import get_key_hash
2937
3038try :
3139 import requests # type: ignore
40+
3241except ImportError :
3342 print (
3443 "Please install requests library using `pip install requests` "
3948 (QueryDB , "query_datetime_utc" ),
4049 (ResponseFeedbackDB , "feedback_datetime_utc" ),
4150 (ContentFeedbackDB , "feedback_datetime_utc" ),
51+ (QueryResponseContentDB , "created_datetime_utc" ),
4252 (UrgencyQueryDB , "message_datetime_utc" ),
4353]
4454
@@ -106,47 +116,54 @@ def generate_feedback(question_text: str, faq_text: str, sentiment: str) -> dict
106116 """
107117
108118 response = completion (
109- model = LITELLM_MODEL_DEFAULT ,
119+ model = LITELLM_MODEL_GENERATION ,
110120 api_base = LITELLM_ENDPOINT ,
111121 api_key = LITELLM_API_KEY ,
112122 messages = [{"role" : "user" , "content" : prompt }],
113123 max_tokens = 100 ,
114124 temperature = 0.7 ,
115125 )
116126
117- # Extract the output from the response
118- feedback_output = response ["choices" ][0 ]["message" ]["content" ].strip ()
119- feedback_output = feedback_output .replace ("json" , "" )
120- feedback_output = feedback_output .replace ("\n " , "" ).strip ()
121-
122127 try :
128+ # Extract the output from the response
129+ feedback_output = response ["choices" ][0 ]["message" ]["content" ].strip ()
130+ feedback_output = remove_json_markdown (feedback_output )
123131 feedback_dict = json .loads (feedback_output )
124132 if isinstance (feedback_dict , dict ) and "output" in feedback_dict :
125-
126133 return feedback_dict
127134 else :
128135 raise ValueError ("Output is not in the correct format." )
129- except ( SyntaxError , ValueError ) as e :
136+ except Exception as e :
130137 print (f"Output is not in the correct format.{ e } " )
131138 return None
132139
133140
134- def save_single_row (endpoint : str , data : dict ) -> dict :
141+ def save_single_row (endpoint : str , data : dict , retries : int = 2 ) -> dict | None :
135142 """
136143 Save a single row in the database.
137144 """
138-
139- response = requests .post (
140- endpoint ,
141- headers = {
142- "accept" : "application/json" ,
143- "Content-Type" : "application/json" ,
144- "Authorization" : f"Bearer { API_KEY } " ,
145- },
146- json = data ,
147- verify = False ,
148- )
149- return response .json ()
145+ try :
146+ response = requests .post (
147+ endpoint ,
148+ headers = {
149+ "accept" : "application/json" ,
150+ "Content-Type" : "application/json" ,
151+ "Authorization" : f"Bearer { API_KEY } " ,
152+ },
153+ json = data ,
154+ verify = False ,
155+ )
156+ response .raise_for_status ()
157+ return response .json ()
158+
159+ except Exception as e :
160+ if retries > 0 :
161+ # Implement exponential wait before retrying
162+ time .sleep (2 ** (2 - retries ))
163+ return save_single_row (endpoint , data , retries = retries - 1 )
164+ else :
165+ print (f"Request failed after retries: { e } " )
166+ return None
150167
151168
152169def process_search (_id : int , text : str ) -> tuple | None :
@@ -161,7 +178,7 @@ def process_search(_id: int, text: str) -> tuple | None:
161178 "generate_tts" : False ,
162179 }
163180 response = save_single_row (endpoint , data )
164- if "search_results" in response :
181+ if response and isinstance ( response , dict ) and "search_results" in response :
165182 return (
166183 _id ,
167184 response ["query_id" ],
@@ -215,7 +232,13 @@ def process_content_feedback(
215232 if is_off_topic and feedback_sentiment == "positive" :
216233 return None
217234 # randomly get a content from the search results to provide feedback on
218- content = search_results [str (random .randint (0 , 3 ))]
235+ content_num = str (random .randint (0 , 3 ))
236+ if not search_results or not isinstance (search_results , dict ):
237+ return None
238+ if content_num not in search_results :
239+ return None
240+
241+ content = search_results [content_num ]
219242
220243 # Get content text and use to generate feedback text using LLMs
221244 content_text = content ["title" ] + " " + content ["text" ]
@@ -253,19 +276,16 @@ def process_urgency_detection(_id: int, text: str) -> tuple | None:
253276 }
254277
255278 response = save_single_row (endpoint , data )
256- if "is_urgent" in response :
279+ if response and "is_urgent" in response :
257280 return (response ["is_urgent" ],)
258281 return None
259282
260283
261- def create_random_datetime_from_string (date_string : str ) -> datetime :
284+ def create_random_datetime_from_string (start_date : datetime ) -> datetime :
262285 """
263286 Create a random datetime from a date in the format "%d-%m-%y
264287 to today
265288 """
266- date_format = "%d-%m-%y"
267-
268- start_date = datetime .strptime (date_string , date_format )
269289
270290 time_difference = datetime .now () - start_date
271291 random_number_of_days = random .randint (0 , time_difference .days )
@@ -296,6 +316,7 @@ def update_date_of_records(models: list, random_dates: list, api_key: str) -> No
296316 # Create a dictionary to map the query_id to the random date
297317 date_map_dic = {queries [i ].query_id : random_dates [i ] for i in range (len (queries ))}
298318 for model in models :
319+ print (f"Updating the date of the records for { model [0 ].__name__ } ..." )
299320 session = next (get_session ())
300321
301322 rows = [c for c in session .query (model [0 ]).all () if c .user_id == user .user_id ]
@@ -312,12 +333,31 @@ def update_date_of_records(models: list, random_dates: list, api_key: str) -> No
312333 session .commit ()
313334
314335
336+ def update_date_of_contents (date : datetime ) -> None :
337+ """
338+ Update the date of the content records in the database for consistency
339+ """
340+ session = next (get_session ())
341+ contents = session .query (ContentDB ).all ()
342+ for content in contents :
343+ content .created_datetime_utc = date
344+ content .updated_datetime_utc = date
345+ session .merge (content )
346+ session .commit ()
347+
348+
315349if __name__ == "__main__" :
316350 HOST = args .host
317351 NB_WORKERS = int (args .nb_workers ) if args .nb_workers else 8
318352 API_KEY = args .api_key if args .api_key else ADMIN_API_KEY
319353
320- start_date = args .start_date if args .start_date else "01-08-23"
354+ date_string = args .start_date if args .start_date else "01-08-23"
355+ date_format = "%d-%m-%y"
356+ start_date = datetime .strptime (date_string , date_format )
357+ assert (
358+ start_date and start_date < datetime .now ()
359+ ), "Invalid start date. Please provide a valid start date."
360+
321361 path = args .csv
322362 df = pd .read_csv (path )
323363 saved_queries = defaultdict (list )
@@ -409,5 +449,8 @@ def update_date_of_records(models: list, random_dates: list, api_key: str) -> No
409449 ]
410450 print ("Updating the date of the records..." )
411451 update_date_of_records (MODELS , random_dates , API_KEY )
452+
453+ print ("Updating the date of the content records..." )
454+ update_date_of_contents (start_date )
412455 print ("All records dates updated successfully." )
413456 print ("All records added successfully." )
0 commit comments