Evaluation.evaluate:v0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import weave
from typing import Union
from typing import Callable
from weave.flow.model import Model
import time
from weave.trace.errors import OpCallError
from rich import print
import traceback
import typing
from weave.flow.dataset import Dataset
import weave.flow.util as util
from weave.trace.env import get_weave_parallelism
from weave.flow.scorer import get_scorer_attributes
@weave.op()
async def evaluate(self, model: Union[Callable, Model]) -> dict:
eval_rows = []
start_time = time.time()
async def eval_example(example: dict) -> dict:
try:
eval_row = await self.predict_and_score(model, example)
except OpCallError as e:
raise e