Skip to content

Commit 55516bf

Browse files
authored
Merge pull request #1606 from Agenta-AI/access-to-all-columns
Evaluators can access all columns
2 parents 2da4143 + 81423d3 commit 55516bf

File tree

24 files changed

+2398
-603
lines changed

24 files changed

+2398
-603
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,345 @@
1+
from datetime import datetime, timezone
2+
from typing import Any, Dict, List, Optional
3+
4+
from pydantic import BaseModel, Field
5+
from beanie import Document, Link, PydanticObjectId, iterative_migration
6+
7+
8+
class UserDB(Document):
9+
uid: str = Field(default="0", unique=True, index=True)
10+
username: str = Field(default="agenta")
11+
email: str = Field(default="[email protected]", unique=True)
12+
created_at: Optional[datetime] = Field(default=datetime.now(timezone.utc))
13+
updated_at: Optional[datetime] = Field(default=datetime.now(timezone.utc))
14+
15+
class Settings:
16+
name = "users"
17+
18+
19+
class ImageDB(Document):
20+
"""Defines the info needed to get an image and connect it to the app variant"""
21+
22+
type: Optional[str] = Field(default="image")
23+
template_uri: Optional[str]
24+
docker_id: Optional[str] = Field(index=True)
25+
tags: Optional[str]
26+
deletable: bool = Field(default=True)
27+
user: Link[UserDB]
28+
created_at: Optional[datetime] = Field(default=datetime.now(timezone.utc))
29+
updated_at: Optional[datetime] = Field(default=datetime.now(timezone.utc))
30+
31+
class Settings:
32+
name = "docker_images"
33+
34+
35+
class AppDB(Document):
36+
app_name: str
37+
user: Link[UserDB]
38+
created_at: Optional[datetime] = Field(default=datetime.now(timezone.utc))
39+
updated_at: Optional[datetime] = Field(default=datetime.now(timezone.utc))
40+
41+
class Settings:
42+
name = "app_db"
43+
44+
45+
class DeploymentDB(Document):
46+
app: Link[AppDB]
47+
user: Link[UserDB]
48+
container_name: Optional[str]
49+
container_id: Optional[str]
50+
uri: Optional[str]
51+
status: str
52+
created_at: Optional[datetime] = Field(default=datetime.now(timezone.utc))
53+
updated_at: Optional[datetime] = Field(default=datetime.now(timezone.utc))
54+
55+
class Settings:
56+
name = "deployments"
57+
58+
59+
class VariantBaseDB(Document):
60+
app: Link[AppDB]
61+
user: Link[UserDB]
62+
base_name: str
63+
image: Link[ImageDB]
64+
deployment: Optional[PydanticObjectId] # Link to deployment
65+
created_at: Optional[datetime] = Field(default=datetime.now(timezone.utc))
66+
updated_at: Optional[datetime] = Field(default=datetime.now(timezone.utc))
67+
68+
class Settings:
69+
name = "bases"
70+
71+
72+
class ConfigDB(BaseModel):
73+
config_name: str
74+
parameters: Dict[str, Any] = Field(default_factory=dict)
75+
76+
77+
class AppVariantDB(Document):
78+
app: Link[AppDB]
79+
variant_name: str
80+
revision: int
81+
image: Link[ImageDB]
82+
user: Link[UserDB]
83+
modified_by: Link[UserDB]
84+
parameters: Dict[str, Any] = Field(default=dict) # TODO: deprecated. remove
85+
previous_variant_name: Optional[str] # TODO: deprecated. remove
86+
base_name: Optional[str]
87+
base: Link[VariantBaseDB]
88+
config_name: Optional[str]
89+
config: ConfigDB
90+
created_at: Optional[datetime] = Field(default=datetime.now(timezone.utc))
91+
updated_at: Optional[datetime] = Field(default=datetime.now(timezone.utc))
92+
93+
is_deleted: bool = Field( # TODO: deprecated. remove
94+
default=False
95+
) # soft deletion for using the template variants
96+
97+
class Settings:
98+
name = "app_variants"
99+
100+
101+
class AppVariantRevisionsDB(Document):
102+
variant: Link[AppVariantDB]
103+
revision: int
104+
modified_by: Link[UserDB]
105+
base: Link[VariantBaseDB]
106+
config: ConfigDB
107+
created_at: datetime
108+
updated_at: Optional[datetime] = Field(default=datetime.now(timezone.utc))
109+
110+
class Settings:
111+
name = "app_variant_revisions"
112+
113+
114+
class AppEnvironmentDB(Document):
115+
app: Link[AppDB]
116+
name: str
117+
user: Link[UserDB]
118+
revision: int
119+
deployed_app_variant: Optional[PydanticObjectId]
120+
deployed_app_variant_revision: Optional[Link[AppVariantRevisionsDB]]
121+
deployment: Optional[PydanticObjectId] # reference to deployment
122+
created_at: Optional[datetime] = Field(default=datetime.now(timezone.utc))
123+
124+
class Settings:
125+
name = "environments"
126+
127+
128+
class AppEnvironmentRevisionDB(Document):
129+
environment: Link[AppEnvironmentDB]
130+
revision: int
131+
modified_by: Link[UserDB]
132+
deployed_app_variant_revision: Optional[PydanticObjectId]
133+
deployment: Optional[PydanticObjectId] # reference to deployment
134+
created_at: datetime
135+
136+
class Settings:
137+
name = "environments_revisions"
138+
139+
140+
class TemplateDB(Document):
141+
type: Optional[str] = Field(default="image")
142+
template_uri: Optional[str]
143+
tag_id: Optional[int]
144+
name: str = Field(unique=True) # tag name of image
145+
repo_name: Optional[str]
146+
title: str
147+
description: str
148+
size: Optional[int]
149+
digest: Optional[str] # sha256 hash of image digest
150+
last_pushed: Optional[datetime]
151+
152+
class Settings:
153+
name = "templates"
154+
155+
156+
class TestSetDB(Document):
157+
name: str
158+
app: Link[AppDB]
159+
csvdata: List[Dict[str, str]]
160+
user: Link[UserDB]
161+
created_at: Optional[datetime] = Field(default=datetime.now(timezone.utc))
162+
updated_at: Optional[datetime] = Field(default=datetime.now(timezone.utc))
163+
164+
class Settings:
165+
name = "testsets"
166+
167+
168+
class EvaluatorConfigDB(Document):
169+
app: Link[AppDB]
170+
user: Link[UserDB]
171+
name: str
172+
evaluator_key: str
173+
settings_values: Dict[str, Any] = Field(default=dict)
174+
created_at: datetime = Field(default=datetime.now(timezone.utc))
175+
updated_at: datetime = Field(default=datetime.now(timezone.utc))
176+
177+
class Settings:
178+
name = "evaluators_configs"
179+
180+
181+
class Error(BaseModel):
182+
message: str
183+
stacktrace: Optional[str] = None
184+
185+
186+
class Result(BaseModel):
187+
type: str
188+
value: Optional[Any] = None
189+
error: Optional[Error] = None
190+
191+
192+
class InvokationResult(BaseModel):
193+
result: Result
194+
cost: Optional[float] = None
195+
latency: Optional[float] = None
196+
197+
198+
class EvaluationScenarioResult(BaseModel):
199+
evaluator_config: PydanticObjectId
200+
result: Result
201+
202+
203+
class AggregatedResult(BaseModel):
204+
evaluator_config: PydanticObjectId
205+
result: Result
206+
207+
208+
class EvaluationScenarioInputDB(BaseModel):
209+
name: str
210+
type: str
211+
value: str
212+
213+
214+
class EvaluationScenarioOutputDB(BaseModel):
215+
result: Result
216+
cost: Optional[float] = None
217+
latency: Optional[float] = None
218+
219+
220+
class HumanEvaluationScenarioInput(BaseModel):
221+
input_name: str
222+
input_value: str
223+
224+
225+
class HumanEvaluationScenarioOutput(BaseModel):
226+
variant_id: str
227+
variant_output: str
228+
229+
230+
class HumanEvaluationDB(Document):
231+
app: Link[AppDB]
232+
user: Link[UserDB]
233+
status: str
234+
evaluation_type: str
235+
variants: List[PydanticObjectId]
236+
variants_revisions: List[PydanticObjectId]
237+
testset: Link[TestSetDB]
238+
created_at: Optional[datetime] = Field(default=datetime.now(timezone.utc))
239+
updated_at: Optional[datetime] = Field(default=datetime.now(timezone.utc))
240+
241+
class Settings:
242+
name = "human_evaluations"
243+
244+
245+
class HumanEvaluationScenarioDB(Document):
246+
user: Link[UserDB]
247+
evaluation: Link[HumanEvaluationDB]
248+
inputs: List[HumanEvaluationScenarioInput]
249+
outputs: List[HumanEvaluationScenarioOutput]
250+
vote: Optional[str]
251+
score: Optional[Any]
252+
correct_answer: Optional[str]
253+
created_at: Optional[datetime] = Field(default=datetime.now(timezone.utc))
254+
updated_at: Optional[datetime] = Field(default=datetime.now(timezone.utc))
255+
is_pinned: Optional[bool]
256+
note: Optional[str]
257+
258+
class Settings:
259+
name = "human_evaluations_scenarios"
260+
261+
262+
class EvaluationDB(Document):
263+
app: Link[AppDB]
264+
user: Link[UserDB]
265+
status: Result
266+
testset: Link[TestSetDB]
267+
variant: PydanticObjectId
268+
variant_revision: PydanticObjectId
269+
evaluators_configs: List[PydanticObjectId]
270+
aggregated_results: List[AggregatedResult]
271+
average_cost: Optional[Result] = None
272+
total_cost: Optional[Result] = None
273+
average_latency: Optional[Result] = None
274+
created_at: datetime = Field(default=datetime.now(timezone.utc))
275+
updated_at: datetime = Field(default=datetime.now(timezone.utc))
276+
277+
class Settings:
278+
name = "new_evaluations"
279+
280+
281+
class CorrectAnswer(BaseModel):
282+
key: str
283+
value: str
284+
285+
286+
class EvaluationScenarioDB(Document):
287+
user: Link[UserDB]
288+
evaluation: Link[EvaluationDB]
289+
variant_id: PydanticObjectId
290+
inputs: List[EvaluationScenarioInputDB]
291+
outputs: List[EvaluationScenarioOutputDB]
292+
correct_answers: Optional[List[CorrectAnswer]]
293+
is_pinned: Optional[bool]
294+
note: Optional[str]
295+
evaluators_configs: List[PydanticObjectId]
296+
results: List[EvaluationScenarioResult]
297+
latency: Optional[int] = None
298+
cost: Optional[int] = None
299+
created_at: datetime = Field(default=datetime.now(timezone.utc))
300+
updated_at: datetime = Field(default=datetime.now(timezone.utc))
301+
302+
class Settings:
303+
name = "new_evaluation_scenarios"
304+
305+
306+
class OldEvaluationScenarioDB(Document):
307+
user: Link[UserDB]
308+
evaluation: Link[EvaluationDB]
309+
variant_id: PydanticObjectId
310+
inputs: List[EvaluationScenarioInputDB]
311+
outputs: List[EvaluationScenarioOutputDB]
312+
correct_answer: Optional[str]
313+
is_pinned: Optional[bool]
314+
note: Optional[str]
315+
evaluators_configs: List[PydanticObjectId]
316+
results: List[EvaluationScenarioResult]
317+
latency: Optional[int] = None
318+
cost: Optional[int] = None
319+
created_at: datetime = Field(default=datetime.now(timezone.utc))
320+
updated_at: datetime = Field(default=datetime.now(timezone.utc))
321+
322+
class Settings:
323+
name = "new_evaluation_scenarios"
324+
325+
326+
class Forward:
327+
@iterative_migration()
328+
async def migrate_correct_answers(
329+
self,
330+
input_document: OldEvaluationScenarioDB,
331+
output_document: EvaluationScenarioDB,
332+
):
333+
if input_document.correct_answer:
334+
output_document.correct_answers = [
335+
CorrectAnswer(key="correct_answer", value=input_document.correct_answer)
336+
]
337+
else:
338+
output_document.correct_answers = []
339+
340+
if "correct_answer" in input_document.dict():
341+
del input_document.__dict__["correct_answer"]
342+
343+
344+
class Backward:
345+
...

agenta-backend/agenta_backend/models/api/evaluation_model.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -158,13 +158,18 @@ class HumanEvaluationScenarioUpdate(BaseModel):
158158
note: Optional[str]
159159

160160

161+
class CorrectAnswer(BaseModel):
162+
key: str
163+
value: str
164+
165+
161166
class EvaluationScenario(BaseModel):
162167
id: Optional[str]
163168
evaluation_id: str
164169
inputs: List[EvaluationScenarioInput]
165170
outputs: List[EvaluationScenarioOutput]
166171
evaluation: Optional[str]
167-
correct_answer: Optional[str]
172+
correct_answers: Optional[List[CorrectAnswer]]
168173
is_pinned: Optional[bool]
169174
note: Optional[str]
170175
results: List[EvaluationScenarioResult]

agenta-backend/agenta_backend/models/converters.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from agenta_backend.utils.common import isCloudEE
1010
from agenta_backend.models.api.user_models import User
1111
from agenta_backend.models.api.evaluation_model import (
12+
CorrectAnswer,
1213
Evaluation,
1314
HumanEvaluation,
1415
EvaluatorConfig,
@@ -253,7 +254,10 @@ def evaluation_scenario_db_to_pydantic(
253254
EvaluationScenarioOutput(**scenario_output.dict())
254255
for scenario_output in evaluation_scenario_db.outputs
255256
],
256-
correct_answer=evaluation_scenario_db.correct_answer,
257+
correct_answers=[
258+
CorrectAnswer(**correct_answer.dict())
259+
for correct_answer in evaluation_scenario_db.correct_answers
260+
],
257261
is_pinned=evaluation_scenario_db.is_pinned or False,
258262
note=evaluation_scenario_db.note or "",
259263
results=evaluation_scenarios_results_to_pydantic(

0 commit comments

Comments
 (0)