@@ -74,23 +74,37 @@ def main() -> None:
74
74
orchestrator_pod_name = socket .gethostname ()
75
75
76
76
client = Client ()
77
+ active_stack = client .active_stack
78
+ orchestrator = active_stack .orchestrator
79
+ assert isinstance (orchestrator , KubernetesOrchestrator )
77
80
78
- deployment_config = client .get_deployment (args .deployment_id )
81
+ deployment = client .get_deployment (args .deployment_id )
82
+ pipeline_settings = cast (
83
+ KubernetesOrchestratorSettings ,
84
+ orchestrator .get_settings (deployment ),
85
+ )
79
86
80
- pipeline_dag = {
81
- step_name : step .spec .upstream_steps
82
- for step_name , step in deployment_config .step_configurations .items ()
83
- }
84
87
step_command = StepEntrypointConfiguration .get_entrypoint_command ()
85
88
86
- active_stack = client .active_stack
89
+ if args .run_id and not pipeline_settings .prevent_orchestrator_pod_caching :
90
+ from zenml .orchestrators import cache_utils
91
+
92
+ run_required = (
93
+ cache_utils .create_cached_step_runs_and_prune_deployment (
94
+ deployment = deployment ,
95
+ pipeline_run = client .get_pipeline_run (args .run_id ),
96
+ stack = active_stack ,
97
+ )
98
+ )
99
+
100
+ if not run_required :
101
+ return
102
+
87
103
mount_local_stores = active_stack .orchestrator .config .is_local
88
104
89
105
# Get a Kubernetes client from the active Kubernetes orchestrator, but
90
106
# override the `incluster` setting to `True` since we are running inside
91
107
# the Kubernetes cluster.
92
- orchestrator = active_stack .orchestrator
93
- assert isinstance (orchestrator , KubernetesOrchestrator )
94
108
kube_client = orchestrator .get_kube_client (incluster = True )
95
109
core_api = k8s_client .CoreV1Api (kube_client )
96
110
@@ -121,7 +135,7 @@ def run_step_on_kubernetes(step_name: str) -> None:
121
135
Raises:
122
136
Exception: If the pod fails to start.
123
137
"""
124
- step_config = deployment_config .step_configurations [step_name ].config
138
+ step_config = deployment .step_configurations [step_name ].config
125
139
settings = step_config .settings .get ("orchestrator.kubernetes" , None )
126
140
settings = KubernetesOrchestratorSettings .model_validate (
127
141
settings .model_dump () if settings else {}
@@ -147,10 +161,10 @@ def run_step_on_kubernetes(step_name: str) -> None:
147
161
)
148
162
149
163
image = KubernetesOrchestrator .get_image (
150
- deployment = deployment_config , step_name = step_name
164
+ deployment = deployment , step_name = step_name
151
165
)
152
166
step_args = StepEntrypointConfiguration .get_entrypoint_arguments (
153
- step_name = step_name , deployment_id = deployment_config .id
167
+ step_name = step_name , deployment_id = deployment .id
154
168
)
155
169
156
170
# We set some default minimum memory resource requests for the step pod
@@ -165,9 +179,7 @@ def run_step_on_kubernetes(step_name: str) -> None:
165
179
166
180
if orchestrator .config .pass_zenml_token_as_secret :
167
181
env .pop ("ZENML_STORE_API_TOKEN" , None )
168
- secret_name = orchestrator .get_token_secret_name (
169
- deployment_config .id
170
- )
182
+ secret_name = orchestrator .get_token_secret_name (deployment .id )
171
183
pod_settings .env .append (
172
184
{
173
185
"name" : "ZENML_STORE_API_TOKEN" ,
@@ -184,7 +196,7 @@ def run_step_on_kubernetes(step_name: str) -> None:
184
196
pod_manifest = build_pod_manifest (
185
197
pod_name = pod_name ,
186
198
run_name = args .run_name ,
187
- pipeline_name = deployment_config .pipeline_configuration .name ,
199
+ pipeline_name = deployment .pipeline_configuration .name ,
188
200
image_name = image ,
189
201
command = step_command ,
190
202
args = step_args ,
@@ -251,8 +263,8 @@ def finalize_run(node_states: Dict[str, NodeStatus]) -> None:
251
263
252
264
pipeline_runs = client .list_pipeline_runs (
253
265
hydrate = True ,
254
- project = deployment_config .project_id ,
255
- deployment_id = deployment_config .id ,
266
+ project = deployment .project_id ,
267
+ deployment_id = deployment .id ,
256
268
** list_args ,
257
269
)
258
270
if not len (pipeline_runs ):
@@ -298,27 +310,26 @@ def finalize_run(node_states: Dict[str, NodeStatus]) -> None:
298
310
parallel_node_startup_waiting_period = (
299
311
orchestrator .config .parallel_step_startup_waiting_period or 0.0
300
312
)
301
- settings = cast (
302
- KubernetesOrchestratorSettings ,
303
- orchestrator .get_settings (deployment_config ),
304
- )
313
+
314
+ pipeline_dag = {
315
+ step_name : step .spec .upstream_steps
316
+ for step_name , step in deployment .step_configurations .items ()
317
+ }
305
318
try :
306
319
ThreadedDagRunner (
307
320
dag = pipeline_dag ,
308
321
run_fn = run_step_on_kubernetes ,
309
322
finalize_fn = finalize_run ,
310
323
parallel_node_startup_waiting_period = parallel_node_startup_waiting_period ,
311
- max_parallelism = settings .max_parallelism ,
324
+ max_parallelism = pipeline_settings .max_parallelism ,
312
325
).run ()
313
326
logger .info ("Orchestration pod completed." )
314
327
finally :
315
328
if (
316
329
orchestrator .config .pass_zenml_token_as_secret
317
- and deployment_config .schedule is None
330
+ and deployment .schedule is None
318
331
):
319
- secret_name = orchestrator .get_token_secret_name (
320
- deployment_config .id
321
- )
332
+ secret_name = orchestrator .get_token_secret_name (deployment .id )
322
333
try :
323
334
kube_utils .delete_secret (
324
335
core_api = core_api ,
0 commit comments