@@ -104,15 +104,15 @@ async def read(self, read_config: Dict):
104104 # TODO: configurable whether to use coreference resolution
105105
106106 new_docs = {compute_mm_hash (doc , prefix = "doc-" ): doc for doc in data }
107- _add_doc_keys = await self .full_docs_storage .filter_keys (list (new_docs .keys ()))
107+ _add_doc_keys = self .full_docs_storage .filter_keys (list (new_docs .keys ()))
108108 new_docs = {k : v for k , v in new_docs .items () if k in _add_doc_keys }
109109
110110 if len (new_docs ) == 0 :
111111 logger .warning ("All documents are already in the storage" )
112112 return
113113
114- await self .full_docs_storage .upsert (new_docs )
115- await self .full_docs_storage .index_done_callback ()
114+ self .full_docs_storage .upsert (new_docs )
115+ self .full_docs_storage .index_done_callback ()
116116
117117 @op ("chunk" , deps = ["read" ])
118118 @async_to_sync_method
@@ -121,7 +121,7 @@ async def chunk(self, chunk_config: Dict):
121121 chunk documents into smaller pieces from full_docs_storage if not already present
122122 """
123123
124- new_docs = await self .meta_storage .get_new_data (self .full_docs_storage )
124+ new_docs = self .meta_storage .get_new_data (self .full_docs_storage )
125125 if len (new_docs ) == 0 :
126126 logger .warning ("All documents are already in the storage" )
127127 return
@@ -133,9 +133,7 @@ async def chunk(self, chunk_config: Dict):
133133 ** chunk_config ,
134134 )
135135
136- _add_chunk_keys = await self .chunks_storage .filter_keys (
137- list (inserting_chunks .keys ())
138- )
136+ _add_chunk_keys = self .chunks_storage .filter_keys (list (inserting_chunks .keys ()))
139137 inserting_chunks = {
140138 k : v for k , v in inserting_chunks .items () if k in _add_chunk_keys
141139 }
@@ -144,10 +142,10 @@ async def chunk(self, chunk_config: Dict):
144142 logger .warning ("All chunks are already in the storage" )
145143 return
146144
147- await self .chunks_storage .upsert (inserting_chunks )
148- await self .chunks_storage .index_done_callback ()
149- await self .meta_storage .mark_done (self .full_docs_storage )
150- await self .meta_storage .index_done_callback ()
145+ self .chunks_storage .upsert (inserting_chunks )
146+ self .chunks_storage .index_done_callback ()
147+ self .meta_storage .mark_done (self .full_docs_storage )
148+ self .meta_storage .index_done_callback ()
151149
152150 @op ("build_kg" , deps = ["chunk" ])
153151 @async_to_sync_method
@@ -156,7 +154,7 @@ async def build_kg(self):
156154 build knowledge graph from text chunks
157155 """
158156 # Step 1: get new chunks according to meta and chunks storage
159- inserting_chunks = await self .meta_storage .get_new_data (self .chunks_storage )
157+ inserting_chunks = self .meta_storage .get_new_data (self .chunks_storage )
160158 if len (inserting_chunks ) == 0 :
161159 logger .warning ("All chunks are already in the storage" )
162160 return
@@ -174,9 +172,9 @@ async def build_kg(self):
174172 return
175173
176174 # Step 3: mark meta
177- await self .graph_storage .index_done_callback ()
178- await self .meta_storage .mark_done (self .chunks_storage )
179- await self .meta_storage .index_done_callback ()
175+ self .graph_storage .index_done_callback ()
176+ self .meta_storage .mark_done (self .chunks_storage )
177+ self .meta_storage .index_done_callback ()
180178
181179 return _add_entities_and_relations
182180
@@ -185,7 +183,7 @@ async def build_kg(self):
185183 async def search (self , search_config : Dict ):
186184 logger .info ("[Search] %s ..." , ", " .join (search_config ["data_sources" ]))
187185
188- seeds = await self .meta_storage .get_new_data (self .full_docs_storage )
186+ seeds = self .meta_storage .get_new_data (self .full_docs_storage )
189187 if len (seeds ) == 0 :
190188 logger .warning ("All documents are already been searched" )
191189 return
@@ -194,19 +192,17 @@ async def search(self, search_config: Dict):
194192 search_config = search_config ,
195193 )
196194
197- _add_search_keys = await self .search_storage .filter_keys (
198- list (search_results .keys ())
199- )
195+ _add_search_keys = self .search_storage .filter_keys (list (search_results .keys ()))
200196 search_results = {
201197 k : v for k , v in search_results .items () if k in _add_search_keys
202198 }
203199 if len (search_results ) == 0 :
204200 logger .warning ("All search results are already in the storage" )
205201 return
206- await self .search_storage .upsert (search_results )
207- await self .search_storage .index_done_callback ()
208- await self .meta_storage .mark_done (self .full_docs_storage )
209- await self .meta_storage .index_done_callback ()
202+ self .search_storage .upsert (search_results )
203+ self .search_storage .index_done_callback ()
204+ self .meta_storage .mark_done (self .full_docs_storage )
205+ self .meta_storage .index_done_callback ()
210206
211207 @op ("quiz_and_judge" , deps = ["build_kg" ])
212208 @async_to_sync_method
@@ -240,8 +236,8 @@ async def quiz_and_judge(self, quiz_and_judge_config: Dict):
240236 progress_bar = self .progress_bar ,
241237 )
242238
243- await self .rephrase_storage .index_done_callback ()
244- await _update_relations .index_done_callback ()
239+ self .rephrase_storage .index_done_callback ()
240+ _update_relations .index_done_callback ()
245241
246242 logger .info ("Shutting down trainee LLM client." )
247243 self .trainee_llm_client .shutdown ()
@@ -258,7 +254,7 @@ async def partition(self, partition_config: Dict):
258254 self .tokenizer_instance ,
259255 partition_config ,
260256 )
261- await self .partition_storage .upsert (batches )
257+ self .partition_storage .upsert (batches )
262258 return batches
263259
264260 @op ("extract" , deps = ["chunk" ])
@@ -276,10 +272,10 @@ async def extract(self, extract_config: Dict):
276272 logger .warning ("No information extracted" )
277273 return
278274
279- await self .extract_storage .upsert (results )
280- await self .extract_storage .index_done_callback ()
281- await self .meta_storage .mark_done (self .chunks_storage )
282- await self .meta_storage .index_done_callback ()
275+ self .extract_storage .upsert (results )
276+ self .extract_storage .index_done_callback ()
277+ self .meta_storage .mark_done (self .chunks_storage )
278+ self .meta_storage .index_done_callback ()
283279
284280 @op ("generate" , deps = ["partition" ])
285281 @async_to_sync_method
@@ -303,17 +299,17 @@ async def generate(self, generate_config: Dict):
303299 return
304300
305301 # Step 3: store the generated QA pairs
306- await self .qa_storage .upsert (results )
307- await self .qa_storage .index_done_callback ()
302+ self .qa_storage .upsert (results )
303+ self .qa_storage .index_done_callback ()
308304
309305 @async_to_sync_method
310306 async def clear (self ):
311- await self .full_docs_storage .drop ()
312- await self .chunks_storage .drop ()
313- await self .search_storage .drop ()
314- await self .graph_storage .clear ()
315- await self .rephrase_storage .drop ()
316- await self .qa_storage .drop ()
307+ self .full_docs_storage .drop ()
308+ self .chunks_storage .drop ()
309+ self .search_storage .drop ()
310+ self .graph_storage .clear ()
311+ self .rephrase_storage .drop ()
312+ self .qa_storage .drop ()
317313
318314 logger .info ("All caches are cleared" )
319315
0 commit comments