Evaluation.evaluate:v2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import weave
from typing import Union
from weave.trace.op import Op
from weave.flow.model import Model
from rich import print
from weave.trace.weave_client import Call
from datetime import datetime
from weave.flow.util import make_memorable_name
def default_evaluation_display_name(call: Call) -> str:
date = datetime.now().strftime("%Y-%m-%d")
unique_name = make_memorable_name()
return f"eval-{date}-{unique_name}"
@weave.op(call_display_name=default_evaluation_display_name)
async def evaluate(self, model: Union[Op, Model]) -> dict:
eval_results = await self.get_eval_results(model)
summary = await self.summarize(eval_results)
print("Evaluation summary", summary)
return summary