Skip to content

Commit 11503e4

Browse files
AlexejPenneravishniakovactions-userschustmibcdurak
authored
Simplify Metadata handling (#3096)
* Initial commit, nuking all metadata responses and seeing what breaks * Removed last remnant of LazyLoader * Reintroducing the lazy loaders. * Add LazyRunMetadataResponse to EntrypointFunctionDefinition * Test for lazy loaders works now * Fixed tests, reformatted * Use updated template * Auto-update of Starter template * Updated more templates * Fixed failing test * Fixed step run schemas * Auto-update of E2E template * Auto-update of NLP template * Fixed tests, removed additional .value access * Further fixing * Fixed linting issues * Reformatted * Linted, formatted and tested again * Typing * Maybe fix everything * Apply some feedback --------- Co-authored-by: Andrei Vishniakov <[email protected]> Co-authored-by: GitHub Actions <[email protected]> Co-authored-by: Michael Schuster <[email protected]> Co-authored-by: Baris Can Durak <[email protected]> Co-authored-by: Barış Can Durak <[email protected]>
1 parent f82cd8a commit 11503e4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+188
-925
lines changed

.github/workflows/update-templates-to-examples.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ jobs:
4646
python-version: ${{ inputs.python-version }}
4747
stack-name: local
4848
ref-zenml: ${{ github.ref }}
49-
ref-template: 2024.10.10 # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py
49+
ref-template: 2024.10.21 # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py
5050
- name: Clean-up
5151
run: |
5252
rm -rf ./local_checkout
@@ -118,7 +118,7 @@ jobs:
118118
python-version: ${{ inputs.python-version }}
119119
stack-name: local
120120
ref-zenml: ${{ github.ref }}
121-
ref-template: 2024.09.23 # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py
121+
ref-template: 2024.10.21 # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py
122122
- name: Clean-up
123123
run: |
124124
rm -rf ./local_checkout
@@ -189,7 +189,7 @@ jobs:
189189
python-version: ${{ inputs.python-version }}
190190
stack-name: local
191191
ref-zenml: ${{ github.ref }}
192-
ref-template: 2024.09.24 # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py
192+
ref-template: 2024.10.21 # Make sure it is aligned with ZENML_PROJECT_TEMPLATES from src/zenml/cli/base.py
193193
- name: Clean-up
194194
run: |
195195
rm -rf ./local_checkout

docs/book/component-guide/orchestrators/kubeflow.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ Kubeflow comes with its own UI that you can use to find further details about yo
198198
from zenml.client import Client
199199

200200
pipeline_run = Client().get_pipeline_run("<PIPELINE_RUN_NAME>")
201-
orchestrator_url = pipeline_run.run_metadata["orchestrator_url"].value
201+
orchestrator_url = pipeline_run.run_metadata["orchestrator_url"]
202202
```
203203

204204
#### Additional configuration

examples/e2e/.copier-answers.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Changes here will be overwritten by Copier
2-
_commit: 2024.10.10
2+
_commit: 2024.10.21
33
_src_path: gh:zenml-io/template-e2e-batch
44
data_quality_checks: true
55

examples/e2e/steps/deployment/deployment_deploy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def deployment_deploy() -> (
6767
registry_model_name=model.name,
6868
registry_model_version=model.run_metadata[
6969
"model_registry_version"
70-
].value,
70+
],
7171
replace_existing=True,
7272
)
7373
else:

examples/e2e/steps/hp_tuning/hp_tuning_select_best_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def hp_tuning_select_best_model(
5050
hp_output = model.get_data_artifact("hp_result")
5151
model_: ClassifierMixin = hp_output.load()
5252
# fetch metadata we attached earlier
53-
metric = float(hp_output.run_metadata["metric"].value)
53+
metric = float(hp_output.run_metadata["metric"])
5454
if best_model is None or best_metric < metric:
5555
best_model = model_
5656
### YOUR CODE ENDS HERE ###

examples/e2e/steps/promotion/promote_with_metric_compare.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,14 +92,14 @@ def promote_with_metric_compare(
9292
# Promote in Model Registry
9393
latest_version_model_registry_number = latest_version.run_metadata[
9494
"model_registry_version"
95-
].value
95+
]
9696
if current_version_number is None:
9797
current_version_model_registry_number = (
9898
latest_version_model_registry_number
9999
)
100100
else:
101101
current_version_model_registry_number = (
102-
current_version.run_metadata["model_registry_version"].value
102+
current_version.run_metadata["model_registry_version"]
103103
)
104104
promote_in_model_registry(
105105
latest_version=latest_version_model_registry_number,
@@ -111,7 +111,7 @@ def promote_with_metric_compare(
111111
else:
112112
promoted_version = current_version.run_metadata[
113113
"model_registry_version"
114-
].value
114+
]
115115

116116
logger.info(
117117
f"Current model version in `{target_env}` is `{promoted_version}` registered in Model Registry"

examples/e2e_nlp/.copier-answers.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Changes here will be overwritten by Copier
2-
_commit: 2024.09.23
2+
_commit: 2024.10.21
33
_src_path: gh:zenml-io/template-nlp
44
accelerator: cpu
55
cloud_of_choice: aws

examples/e2e_nlp/gradio/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,4 @@ pandas==1.5.3
99
session_info==1.0.0
1010
scikit-learn==1.5.0
1111
transformers==4.28.1
12-
IPython==7.34.0
12+
IPython==8.10.0

examples/e2e_nlp/steps/promotion/promote_get_metrics.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,7 @@ def promote_get_metrics() -> (
5656

5757
# Get current model version metric in current run
5858
model = get_step_context().model
59-
current_metrics = (
60-
model.get_model_artifact("model").run_metadata["metrics"].value
61-
)
59+
current_metrics = model.get_model_artifact("model").run_metadata["metrics"]
6260
logger.info(f"Current model version metrics are {current_metrics}")
6361

6462
# Get latest saved model version metric in target environment
@@ -72,11 +70,9 @@ def promote_get_metrics() -> (
7270
except KeyError:
7371
latest_version = None
7472
if latest_version:
75-
latest_metrics = (
76-
latest_version.get_model_artifact("model")
77-
.run_metadata["metrics"]
78-
.value
79-
)
73+
latest_metrics = latest_version.get_model_artifact(
74+
"model"
75+
).run_metadata["metrics"]
8076
logger.info(f"Latest model version metrics are {latest_metrics}")
8177
else:
8278
logger.info("No currently promoted model version found.")

examples/mlops_starter/.copier-answers.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Changes here will be overwritten by Copier
2-
_commit: 2024.09.24
2+
_commit: 2024.10.21
33
_src_path: gh:zenml-io/template-starter
44
55
full_name: ZenML GmbH

examples/mlops_starter/quickstart.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -994,8 +994,8 @@
994994
"@pipeline\n",
995995
"def inference(preprocess_pipeline_id: UUID):\n",
996996
" \"\"\"Model batch inference pipeline\"\"\"\n",
997-
" # random_state = client.get_artifact_version(name_id_or_prefix=preprocess_pipeline_id).metadata[\"random_state\"].value\n",
998-
" # target = client.get_artifact_version(name_id_or_prefix=preprocess_pipeline_id).run_metadata['target'].value\n",
997+
" # random_state = client.get_artifact_version(name_id_or_prefix=preprocess_pipeline_id).metadata[\"random_state\"]\n",
998+
" # target = client.get_artifact_version(name_id_or_prefix=preprocess_pipeline_id).run_metadata['target']\n",
999999
" random_state = 42\n",
10001000
" target = \"target\"\n",
10011001
"\n",

examples/mlops_starter/run.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,8 +239,8 @@ def main(
239239
# to get the random state and target column
240240
random_state = preprocess_pipeline_artifact.run_metadata[
241241
"random_state"
242-
].value
243-
target = preprocess_pipeline_artifact.run_metadata["target"].value
242+
]
243+
target = preprocess_pipeline_artifact.run_metadata["target"]
244244
run_args_inference["random_state"] = random_state
245245
run_args_inference["target"] = target
246246

examples/mlops_starter/steps/model_promoter.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,9 @@ def model_promoter(accuracy: float, stage: str = "production") -> bool:
5858
try:
5959
stage_model = client.get_model_version(current_model.name, stage)
6060
# We compare their metrics
61-
prod_accuracy = (
62-
stage_model.get_artifact("sklearn_classifier")
63-
.run_metadata["test_accuracy"]
64-
.value
65-
)
61+
prod_accuracy = stage_model.get_artifact(
62+
"sklearn_classifier"
63+
).run_metadata["test_accuracy"]
6664
if float(accuracy) > float(prod_accuracy):
6765
# If current model has better metrics, we promote it
6866
is_promoted = True

src/zenml/cli/base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,15 +79,15 @@ def copier_github_url(self) -> str:
7979
ZENML_PROJECT_TEMPLATES = dict(
8080
e2e_batch=ZenMLProjectTemplateLocation(
8181
github_url="zenml-io/template-e2e-batch",
82-
github_tag="2024.10.10", # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml
82+
github_tag="2024.10.21", # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml
8383
),
8484
starter=ZenMLProjectTemplateLocation(
8585
github_url="zenml-io/template-starter",
86-
github_tag="2024.09.24", # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml
86+
github_tag="2024.10.21", # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml
8787
),
8888
nlp=ZenMLProjectTemplateLocation(
8989
github_url="zenml-io/template-nlp",
90-
github_tag="2024.09.23", # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml
90+
github_tag="2024.10.21", # Make sure it is aligned with .github/workflows/update-templates-to-examples.yml
9191
),
9292
llm_finetuning=ZenMLProjectTemplateLocation(
9393
github_url="zenml-io/template-llm-finetuning",

src/zenml/cli/model.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,19 +59,14 @@ def _model_to_print(model: ModelResponse) -> Dict[str, Any]:
5959
def _model_version_to_print(
6060
model_version: ModelVersionResponse,
6161
) -> Dict[str, Any]:
62-
run_metadata = None
63-
if model_version.run_metadata:
64-
run_metadata = {
65-
k: v.value for k, v in model_version.run_metadata.items()
66-
}
6762
return {
6863
"id": model_version.id,
6964
"model": model_version.model.name,
7065
"name": model_version.name,
7166
"number": model_version.number,
7267
"description": model_version.description,
7368
"stage": model_version.stage,
74-
"run_metadata": run_metadata,
69+
"run_metadata": model_version.run_metadata,
7570
"tags": [t.name for t in model_version.tags],
7671
"data_artifacts_count": len(model_version.data_artifact_ids),
7772
"model_artifacts_count": len(model_version.model_artifact_ids),

src/zenml/client.py

Lines changed: 4 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,7 @@
136136
PipelineResponse,
137137
PipelineRunFilter,
138138
PipelineRunResponse,
139-
RunMetadataFilter,
140139
RunMetadataRequest,
141-
RunMetadataResponse,
142140
RunTemplateFilter,
143141
RunTemplateRequest,
144142
RunTemplateResponse,
@@ -4417,7 +4415,7 @@ def create_run_metadata(
44174415
resource_id: UUID,
44184416
resource_type: MetadataResourceTypes,
44194417
stack_component_id: Optional[UUID] = None,
4420-
) -> List[RunMetadataResponse]:
4418+
) -> None:
44214419
"""Create run metadata.
44224420
44234421
Args:
@@ -4430,7 +4428,7 @@ def create_run_metadata(
44304428
the metadata.
44314429
44324430
Returns:
4433-
The created metadata, as string to model dictionary.
4431+
None
44344432
"""
44354433
from zenml.metadata.metadata_types import get_metadata_type
44364434

@@ -4465,74 +4463,8 @@ def create_run_metadata(
44654463
values=values,
44664464
types=types,
44674465
)
4468-
return self.zen_store.create_run_metadata(run_metadata)
4469-
4470-
def list_run_metadata(
4471-
self,
4472-
sort_by: str = "created",
4473-
page: int = PAGINATION_STARTING_PAGE,
4474-
size: int = PAGE_SIZE_DEFAULT,
4475-
logical_operator: LogicalOperators = LogicalOperators.AND,
4476-
id: Optional[Union[UUID, str]] = None,
4477-
created: Optional[Union[datetime, str]] = None,
4478-
updated: Optional[Union[datetime, str]] = None,
4479-
workspace_id: Optional[UUID] = None,
4480-
user_id: Optional[UUID] = None,
4481-
resource_id: Optional[UUID] = None,
4482-
resource_type: Optional[MetadataResourceTypes] = None,
4483-
stack_component_id: Optional[UUID] = None,
4484-
key: Optional[str] = None,
4485-
value: Optional["MetadataType"] = None,
4486-
type: Optional[str] = None,
4487-
hydrate: bool = False,
4488-
) -> Page[RunMetadataResponse]:
4489-
"""List run metadata.
4490-
4491-
Args:
4492-
sort_by: The field to sort the results by.
4493-
page: The page number to return.
4494-
size: The number of results to return per page.
4495-
logical_operator: The logical operator to use for filtering.
4496-
id: The ID of the metadata.
4497-
created: The creation time of the metadata.
4498-
updated: The last update time of the metadata.
4499-
workspace_id: The ID of the workspace the metadata belongs to.
4500-
user_id: The ID of the user that created the metadata.
4501-
resource_id: The ID of the resource the metadata belongs to.
4502-
resource_type: The type of the resource the metadata belongs to.
4503-
stack_component_id: The ID of the stack component that produced
4504-
the metadata.
4505-
key: The key of the metadata.
4506-
value: The value of the metadata.
4507-
type: The type of the metadata.
4508-
hydrate: Flag deciding whether to hydrate the output model(s)
4509-
by including metadata fields in the response.
4510-
4511-
Returns:
4512-
The run metadata.
4513-
"""
4514-
metadata_filter_model = RunMetadataFilter(
4515-
sort_by=sort_by,
4516-
page=page,
4517-
size=size,
4518-
logical_operator=logical_operator,
4519-
id=id,
4520-
created=created,
4521-
updated=updated,
4522-
workspace_id=workspace_id,
4523-
user_id=user_id,
4524-
resource_id=resource_id,
4525-
resource_type=resource_type,
4526-
stack_component_id=stack_component_id,
4527-
key=key,
4528-
value=value,
4529-
type=type,
4530-
)
4531-
metadata_filter_model.set_scope_workspace(self.active_workspace.id)
4532-
return self.zen_store.list_run_metadata(
4533-
metadata_filter_model,
4534-
hydrate=hydrate,
4535-
)
4466+
self.zen_store.create_run_metadata(run_metadata)
4467+
return None
45364468

45374469
# -------------------------------- Secrets ---------------------------------
45384470

src/zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,7 @@ def fetch_status(self, run: "PipelineRunResponse") -> ExecutionStatus:
566566

567567
# Fetch the status of the _PipelineExecution
568568
if METADATA_ORCHESTRATOR_RUN_ID in run.run_metadata:
569-
run_id = run.run_metadata[METADATA_ORCHESTRATOR_RUN_ID].value
569+
run_id = run.run_metadata[METADATA_ORCHESTRATOR_RUN_ID]
570570
elif run.orchestrator_run_id is not None:
571571
run_id = run.orchestrator_run_id
572572
else:

src/zenml/integrations/azure/orchestrators/azureml_orchestrator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ def fetch_status(self, run: "PipelineRunResponse") -> ExecutionStatus:
482482

483483
# Fetch the status of the PipelineJob
484484
if METADATA_ORCHESTRATOR_RUN_ID in run.run_metadata:
485-
run_id = run.run_metadata[METADATA_ORCHESTRATOR_RUN_ID].value
485+
run_id = run.run_metadata[METADATA_ORCHESTRATOR_RUN_ID]
486486
elif run.orchestrator_run_id is not None:
487487
run_id = run.orchestrator_run_id
488488
else:

src/zenml/integrations/gcp/orchestrators/vertex_orchestrator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -835,7 +835,7 @@ def fetch_status(self, run: "PipelineRunResponse") -> ExecutionStatus:
835835

836836
# Fetch the status of the PipelineJob
837837
if METADATA_ORCHESTRATOR_RUN_ID in run.run_metadata:
838-
run_id = run.run_metadata[METADATA_ORCHESTRATOR_RUN_ID].value
838+
run_id = run.run_metadata[METADATA_ORCHESTRATOR_RUN_ID]
839839
elif run.orchestrator_run_id is not None:
840840
run_id = run.orchestrator_run_id
841841
else:

src/zenml/metadata/lazy_load.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,25 @@
1313
# permissions and limitations under the License.
1414
"""Run Metadata Lazy Loader definition."""
1515

16-
from typing import TYPE_CHECKING, Optional
16+
from typing import Optional
1717

18-
if TYPE_CHECKING:
19-
from zenml.models import RunMetadataResponse
18+
from pydantic import BaseModel
19+
20+
from zenml.metadata.metadata_types import MetadataType
21+
22+
23+
class LazyRunMetadataResponse(BaseModel):
24+
"""Lazy run metadata response.
25+
26+
Used if the run metadata is accessed from the model in
27+
a pipeline context available only during pipeline compilation.
28+
"""
29+
30+
lazy_load_artifact_name: Optional[str] = None
31+
lazy_load_artifact_version: Optional[str] = None
32+
lazy_load_metadata_name: Optional[str] = None
33+
lazy_load_model_name: str
34+
lazy_load_model_version: Optional[str] = None
2035

2136

2237
class RunMetadataLazyGetter:
@@ -47,7 +62,7 @@ def __init__(
4762
self._lazy_load_artifact_name = _lazy_load_artifact_name
4863
self._lazy_load_artifact_version = _lazy_load_artifact_version
4964

50-
def __getitem__(self, key: str) -> "RunMetadataResponse":
65+
def __getitem__(self, key: str) -> MetadataType:
5166
"""Get the metadata for the given key.
5267
5368
Args:
@@ -56,9 +71,7 @@ def __getitem__(self, key: str) -> "RunMetadataResponse":
5671
Returns:
5772
The metadata lazy loader wrapper for the given key.
5873
"""
59-
from zenml.models.v2.core.run_metadata import LazyRunMetadataResponse
60-
61-
return LazyRunMetadataResponse(
74+
return LazyRunMetadataResponse( # type: ignore[return-value]
6275
lazy_load_model_name=self._lazy_load_model_name,
6376
lazy_load_model_version=self._lazy_load_model_version,
6477
lazy_load_artifact_name=self._lazy_load_artifact_name,

0 commit comments

Comments
 (0)