BM25sRetriever.retrieve:v0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import weave
import bm25s
from Stemmer import Stemmer
LANGUAGE_DICT = {
"english": "en",
"french": "fr",
"german": "de"
}
@weave.op()
def retrieve(self, query: str, top_k: int = 2):
query_tokens = bm25s.tokenize(
query,
stopwords=LANGUAGE_DICT[self.language],
stemmer=Stemmer(self.language) if self.use_stemmer else None,
)
results, scores = self._retriever.retrieve(query_tokens, k=top_k)
return {
"results": results,
"scores": scores,
}