Skip to content

Commit 52b29a4

Browse files
google-genai-botcopybara-github
authored andcommitted
feat: Implement GcsEvalSetResultsManager to handle storage of eval sets on GCS, and refactor eval set results manager
Eval results will be stored as json files under `gs://{bucket_name}/{app_name}/evals/eval_history/` PiperOrigin-RevId: 766808381
1 parent 6c999ca commit 52b29a4

12 files changed

+1191
-116
lines changed

src/google/adk/evaluation/eval_result.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,9 @@ class EvalCaseResult(BaseModel):
3636
populate_by_name=True,
3737
)
3838

39-
eval_set_file: str = Field(
39+
eval_set_file: Optional[str] = Field(
4040
deprecated=True,
41+
default=None,
4142
description="This field is deprecated, use eval_set_id instead.",
4243
)
4344
eval_set_id: str = ""
@@ -49,11 +50,15 @@ class EvalCaseResult(BaseModel):
4950
final_eval_status: EvalStatus
5051
"""Final eval status for this eval case."""
5152

52-
eval_metric_results: list[tuple[EvalMetric, EvalMetricResult]] = Field(
53-
deprecated=True,
54-
description=(
55-
"This field is deprecated, use overall_eval_metric_results instead."
56-
),
53+
eval_metric_results: Optional[list[tuple[EvalMetric, EvalMetricResult]]] = (
54+
Field(
55+
deprecated=True,
56+
default=None,
57+
description=(
58+
"This field is deprecated, use overall_eval_metric_results"
59+
" instead."
60+
),
61+
)
5762
)
5863

5964
overall_eval_metric_results: list[EvalMetricResult]
@@ -80,7 +85,7 @@ class EvalSetResult(BaseModel):
8085
populate_by_name=True,
8186
)
8287
eval_set_result_id: str
83-
eval_set_result_name: str
88+
eval_set_result_name: Optional[str] = None
8489
eval_set_id: str
8590
eval_case_results: list[EvalCaseResult] = Field(default_factory=list)
8691
creation_timestamp: float = 0.0

src/google/adk/evaluation/eval_set_results_manager.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,17 @@
1616

1717
from abc import ABC
1818
from abc import abstractmethod
19+
import time
1920

2021
from .eval_result import EvalCaseResult
2122
from .eval_result import EvalSetResult
2223

2324

25+
def _sanitize_eval_set_result_name(eval_set_result_name: str) -> str:
26+
"""Sanitizes the eval set result name."""
27+
return eval_set_result_name.replace("/", "_")
28+
29+
2430
class EvalSetResultsManager(ABC):
2531
"""An interface to manage Eval Set Results."""
2632

@@ -45,3 +51,22 @@ def get_eval_set_result(
4551
def list_eval_set_results(self, app_name: str) -> list[str]:
4652
"""Returns the eval result ids that belong to the given app_name."""
4753
raise NotImplementedError()
54+
55+
def create_eval_set_result(
56+
self,
57+
app_name: str,
58+
eval_set_id: str,
59+
eval_case_results: list[EvalCaseResult],
60+
) -> EvalSetResult:
61+
"""Creates a new EvalSetResult given eval_case_results."""
62+
timestamp = time.time()
63+
eval_set_result_id = f"{app_name}_{eval_set_id}_{timestamp}"
64+
eval_set_result_name = _sanitize_eval_set_result_name(eval_set_result_id)
65+
eval_set_result = EvalSetResult(
66+
eval_set_result_id=eval_set_result_id,
67+
eval_set_result_name=eval_set_result_name,
68+
eval_set_id=eval_set_id,
69+
eval_case_results=eval_case_results,
70+
creation_timestamp=timestamp,
71+
)
72+
return eval_set_result

src/google/adk/evaluation/eval_sets_manager.py

Lines changed: 113 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,15 @@
1616

1717
from abc import ABC
1818
from abc import abstractmethod
19+
import logging
1920
from typing import Optional
2021

2122
from ..errors.not_found_error import NotFoundError
2223
from .eval_case import EvalCase
2324
from .eval_set import EvalSet
2425

26+
logger = logging.getLogger("google_adk." + __name__)
27+
2528

2629
class EvalSetsManager(ABC):
2730
"""An interface to manage an Eval Sets."""
@@ -38,12 +41,6 @@ def create_eval_set(self, app_name: str, eval_set_id: str):
3841
def list_eval_sets(self, app_name: str) -> list[str]:
3942
"""Returns a list of EvalSets that belong to the given app_name."""
4043

41-
@abstractmethod
42-
def get_eval_case(
43-
self, app_name: str, eval_set_id: str, eval_case_id: str
44-
) -> Optional[EvalCase]:
45-
"""Returns an EvalCase if found, otherwise None."""
46-
4744
@abstractmethod
4845
def add_eval_case(self, app_name: str, eval_set_id: str, eval_case: EvalCase):
4946
"""Adds the given EvalCase to an existing EvalSet identified by app_name and eval_set_id.
@@ -71,3 +68,113 @@ def delete_eval_case(
7168
Raises:
7269
NotFoundError: If the eval set or the eval case to delete is not found.
7370
"""
71+
72+
def get_eval_case(
73+
self, app_name: str, eval_set_id: str, eval_case_id: str
74+
) -> Optional[EvalCase]:
75+
"""Returns an EvalCase if found, otherwise None."""
76+
eval_set = self.get_eval_set(app_name, eval_set_id)
77+
78+
if not eval_set:
79+
return None
80+
81+
eval_case_to_find = None
82+
83+
# Look up the eval case by eval_case_id
84+
for eval_case in eval_set.eval_cases:
85+
if eval_case.eval_id == eval_case_id:
86+
eval_case_to_find = eval_case
87+
break
88+
89+
return eval_case_to_find
90+
91+
def _add_eval_case_to_eval_set(
92+
self, app_name: str, eval_set_id: str, eval_case: EvalCase
93+
) -> EvalSet:
94+
"""Adds an eval case to an eval set and returns the updated eval set.
95+
96+
Returns:
97+
The updated eval set with the added eval case.
98+
99+
Raises:
100+
NotFoundError: If the eval set is not found.
101+
ValueError: If the eval case already exists in the eval set.
102+
"""
103+
eval_set = self.get_eval_set(app_name, eval_set_id)
104+
if not eval_set:
105+
raise NotFoundError(f"Eval set `{eval_set_id}` not found.")
106+
eval_case_id = eval_case.eval_id
107+
108+
if [x for x in eval_set.eval_cases if x.eval_id == eval_case_id]:
109+
raise ValueError(
110+
f"Eval id `{eval_case_id}` already exists in `{eval_set_id}`"
111+
" eval set.",
112+
)
113+
114+
eval_set.eval_cases.append(eval_case)
115+
return eval_set
116+
117+
def _update_eval_case_in_eval_set(
118+
self, app_name: str, eval_set_id: str, updated_eval_case: EvalCase
119+
) -> EvalSet:
120+
"""Updates an eval case in an eval set and returns the updated eval set.
121+
122+
Returns:
123+
The updated eval set with the updated eval case.
124+
125+
Raises:
126+
NotFoundError: If the eval set or the eval case to delete is not found.
127+
"""
128+
eval_set = self.get_eval_set(app_name, eval_set_id)
129+
if not eval_set:
130+
raise NotFoundError(f"Eval set `{eval_set_id}` not found.")
131+
132+
# Find the eval case to be updated.
133+
eval_case_id = updated_eval_case.eval_id
134+
eval_case_to_update = self.get_eval_case(
135+
app_name, eval_set_id, eval_case_id
136+
)
137+
138+
if not eval_case_to_update:
139+
raise NotFoundError(
140+
f"Eval case `{eval_case_id}` not found in eval set `{eval_set_id}`."
141+
)
142+
143+
# Remove the existing eval case and add the updated eval case.
144+
eval_set.eval_cases.remove(eval_case_to_update)
145+
eval_set.eval_cases.append(updated_eval_case)
146+
return eval_set
147+
148+
def _delete_eval_case_from_eval_set(
149+
self, app_name: str, eval_set_id: str, eval_case_id: str
150+
) -> EvalSet:
151+
"""Deletes an eval case from an eval set and returns the updated eval set.
152+
153+
Returns:
154+
The updated eval set with eval case removed.
155+
156+
Raises:
157+
NotFoundError: If the eval set or the eval case to delete is not found.
158+
"""
159+
eval_set = self.get_eval_set(app_name, eval_set_id)
160+
if not eval_set:
161+
raise NotFoundError(f"Eval set `{eval_set_id}` not found.")
162+
163+
# Find the eval case to be deleted.
164+
eval_case_to_delete = self.get_eval_case(
165+
app_name, eval_set_id, eval_case_id
166+
)
167+
168+
if not eval_case_to_delete:
169+
raise NotFoundError(
170+
f"Eval case `{eval_case_id}` not found in eval set `{eval_set_id}`."
171+
)
172+
173+
# Remove the existing eval case.
174+
logger.info(
175+
"EvalCase`%s` was found in the eval set. It will be removed "
176+
"permanently.",
177+
eval_case_id,
178+
)
179+
eval_set.eval_cases.remove(eval_case_to_delete)
180+
return eval_set
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import logging
18+
19+
from google.cloud import exceptions as cloud_exceptions
20+
from google.cloud import storage
21+
from typing_extensions import override
22+
23+
from .eval_result import EvalCaseResult
24+
from .eval_result import EvalSetResult
25+
from .eval_set_results_manager import EvalSetResultsManager
26+
27+
logger = logging.getLogger("google_adk." + __name__)
28+
29+
_EVAL_HISTORY_DIR = "evals/eval_history"
30+
_EVAL_SET_RESULT_FILE_EXTENSION = ".evalset_result.json"
31+
32+
33+
class GcsEvalSetResultsManager(EvalSetResultsManager):
34+
"""An EvalSetResultsManager that stores eval results in a GCS bucket."""
35+
36+
def __init__(self, bucket_name: str, **kwargs):
37+
"""Initializes the GcsEvalSetsManager.
38+
39+
Args:
40+
bucket_name: The name of the bucket to use.
41+
**kwargs: Keyword arguments to pass to the Google Cloud Storage client.
42+
"""
43+
self.bucket_name = bucket_name
44+
self.storage_client = storage.Client(**kwargs)
45+
self.bucket = self.storage_client.bucket(self.bucket_name)
46+
# Check if the bucket exists.
47+
if not self.bucket.exists():
48+
raise ValueError(
49+
f"Bucket `{self.bucket_name}` does not exist. Please create it before"
50+
" using the GcsEvalSetsManager."
51+
)
52+
53+
def _get_eval_history_dir(self, app_name: str) -> str:
54+
return f"{app_name}/{_EVAL_HISTORY_DIR}"
55+
56+
def _get_eval_set_result_blob_name(
57+
self, app_name: str, eval_set_result_id: str
58+
) -> str:
59+
eval_history_dir = self._get_eval_history_dir(app_name)
60+
return f"{eval_history_dir}/{eval_set_result_id}{_EVAL_SET_RESULT_FILE_EXTENSION}"
61+
62+
def _write_eval_set_result(
63+
self, blob_name: str, eval_set_result: EvalSetResult
64+
):
65+
"""Writes an EvalSetResult to GCS."""
66+
blob = self.bucket.blob(blob_name)
67+
blob.upload_from_string(
68+
eval_set_result.model_dump_json(indent=2),
69+
content_type="application/json",
70+
)
71+
72+
@override
73+
def save_eval_set_result(
74+
self,
75+
app_name: str,
76+
eval_set_id: str,
77+
eval_case_results: list[EvalCaseResult],
78+
) -> None:
79+
"""Creates and saves a new EvalSetResult given eval_case_results."""
80+
eval_set_result = self.create_eval_set_result(
81+
app_name, eval_set_id, eval_case_results
82+
)
83+
84+
eval_set_result_blob_name = self._get_eval_set_result_blob_name(
85+
app_name, eval_set_result.eval_set_result_id
86+
)
87+
logger.info("Writing eval result to blob: %s", eval_set_result_blob_nameå)
88+
self._write_eval_set_result(eval_set_result_blob_name, eval_set_result)
89+
90+
@override
91+
def get_eval_set_result(
92+
self, app_name: str, eval_set_result_id: str
93+
) -> EvalSetResult:
94+
"""Returns an EvalSetResult from app_name and eval_set_result_id."""
95+
try:
96+
eval_set_result_blob_name = self._get_eval_set_result_blob_name(
97+
app_name, eval_set_result_id
98+
)
99+
blob = self.bucket.blob(eval_set_result_blob_name)
100+
eval_set_result_data = blob.download_as_text()
101+
return EvalSetResult.model_validate_json(eval_set_result_data)
102+
except cloud_exceptions.NotFound:
103+
return None
104+
105+
@override
106+
def list_eval_set_results(self, app_name: str) -> list[str]:
107+
"""Returns the eval result ids that belong to the given app_name."""
108+
eval_history_dir = self._get_eval_history_dir(app_name)
109+
eval_set_results = []
110+
try:
111+
for blob in self.bucket.list_blobs(prefix=eval_history_dir):
112+
eval_set_result_id = blob.name.split("/")[-1].removesuffix(
113+
_EVAL_SET_RESULT_FILE_EXTENSION
114+
)
115+
eval_set_results.append(eval_set_result_id)
116+
return sorted(eval_set_results)
117+
except cloud_exceptions.NotFound as e:
118+
raise ValueError(
119+
f"App `{app_name}` not found in GCS bucket `{self.bucket_name}`."
120+
) from e

0 commit comments

Comments
 (0)