compute_f1_score:v0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import weave
from typing import List
from typing import Dict
from typing import Any
@weave.op
def compute_precision(
model_output: List[Dict[str, Any]], contexts: List[Dict[str, Any]]
) -> float:
"""
Calculate the Precision for a single query.
Args:
model_output (List[Dict[str, Any]]): The list of retrieved documents from the model.
Each dictionary contains:
- 'source': A unique identifier for the document.
- 'score': The relevance score of the document.
contexts (List[Dict[str, Any]]): A list of dictionaries representing the relevant contexts.
Each dictionary contains:
- 'source': A unique identifier for the relevant document.
Returns:
float: The Precision score for the given query.
Precision measures the proportion of retrieved documents that are relevant.
"""
relevant_sources = {
context["source"] for context in contexts if context["relevance"] != 0