DenseRetriever.search:v0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import weave
from scipy.spatial.distance import cdist
@weave.op()
def search(self, query, k=5):
"""
Searches the indexed data for the given query using cosine similarity.
Args:
query (str): The search query.
k (int): The number of top results to return. Default is 5.
Returns:
list: A list of dictionaries containing the source, text, and score of the top-k results.
"""
query_embedding = self.vectorizer([query], input_type="search_query")
cosine_distances = cdist(query_embedding, self.index, metric="cosine")[0]
top_k_indices = cosine_distances.argsort()[:k]
output = []
for idx in top_k_indices:
output.append(
{
"source": self.data[idx]["metadata"]["source"],
"text": self.data[idx]["cleaned_content"],
"score": 1 - cosine_distances[idx],
}
)