Evaluation.summarize:v0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import weave
import typing
from weave.flow.scorer import auto_summarize
from weave.flow.scorer import get_scorer_attributes
import numpy as np
@weave.op()
async def summarize(self, eval_table: typing.Union[weave.WeaveList, list]) -> dict:
summary = {}
if not isinstance(eval_table, weave.WeaveList):
eval_table = weave.WeaveList(eval_table)
model_output_summary = auto_summarize(eval_table.column("model_output"))
if model_output_summary:
summary["model_output"] = model_output_summary
scorers = self.scorers or []
for scorer in scorers:
scorer_name, _, summarize_fn = get_scorer_attributes(scorer)
scorer_scores = eval_table.column("scores").column(scorer_name)
summary[scorer_name] = summarize_fn(scorer_scores) # type: ignore
summary["model_latency"] = {
"mean": float(np.mean(eval_table.column("model_latency"))),
# "stderr": stderr(list(eval_table.column("model_latency"))),
}
return summary