@@ -20,7 +20,7 @@ async def search_all(
2020 Perform searches across multiple search types and aggregate the results.
2121 :param seed_data: A dictionary containing seed data with entity names.
2222 :param search_config: A dictionary specifying which data sources to use for searching.
23- :return: A dictionary with doc_hash as keys and search results as values.
23+ :return: A dictionary with
2424 """
2525
2626 results = {}
@@ -34,47 +34,19 @@ async def search_all(
3434 ** search_config .get ("uniprot_params" , {})
3535 )
3636
37- # Prepare search queries: map doc_hash to content
38- doc_queries = {}
39- for doc_hash , doc_data in seed_data .items ():
40- # Try to extract search query from different data types
41- query = None
42- if "content" in doc_data :
43- query = doc_data ["content" ]
44- elif doc_data .get ("type" ) == "protein" and "protein_caption" in doc_data :
45- # For protein type, try to use sequence, id, or protein_name
46- protein_caption = doc_data ["protein_caption" ]
47- if "sequence" in protein_caption and protein_caption ["sequence" ]:
48- query = protein_caption ["sequence" ]
49- elif "id" in protein_caption and protein_caption ["id" ]:
50- query = protein_caption ["id" ]
51- elif "protein_name" in protein_caption and protein_caption ["protein_name" ]:
52- query = protein_caption ["protein_name" ]
53-
54- if query :
55- if query not in doc_queries :
56- doc_queries [query ] = []
57- doc_queries [query ].append (doc_hash )
58-
59- # Get unique queries
60- unique_queries = list (doc_queries .keys ())
61-
62- # Perform searches
37+ data = list (seed_data .values ())
38+ data = [d ["content" ] for d in data if "content" in d ]
39+ data = list (set (data )) # Remove duplicates
6340 uniprot_results = await run_concurrent (
6441 uniprot_search_client .search ,
65- unique_queries ,
42+ data ,
6643 desc = "Searching UniProt database" ,
6744 unit = "keyword" ,
6845 )
69-
70- # Map results back to doc hashes
71- for query , result in zip (unique_queries , uniprot_results ):
72- for doc_hash in doc_queries [query ]:
73- if doc_hash not in results :
74- results [doc_hash ] = {}
75- results [doc_hash ][data_source ] = result
7646 else :
7747 logger .error ("Data source %s not supported." , data_source )
7848 continue
7949
50+ results [data_source ] = uniprot_results
51+
8052 return results
0 commit comments