Skip to content

Commit 96d034f

Browse files
committed
[Bench] add SubmitKernel benchmark variant wiht longer kernel
To test OOO queues
1 parent b8cc70c commit 96d034f

File tree

1 file changed

+32
-12
lines changed

1 file changed

+32
-12
lines changed

devops/scripts/benchmarks/benches/compute.py

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -144,15 +144,17 @@ def benchmarks(self) -> list[Benchmark]:
144144
for in_order_queue in [0, 1]:
145145
for measure_completion in [0, 1]:
146146
for use_events in [0, 1]:
147-
benches.append(
148-
SubmitKernel(
149-
self,
150-
runtime,
151-
in_order_queue,
152-
measure_completion,
153-
use_events,
147+
for kernel_exec_time in [1, 20]:
148+
benches.append(
149+
SubmitKernel(
150+
self,
151+
runtime,
152+
in_order_queue,
153+
measure_completion,
154+
use_events,
155+
kernel_exec_time,
156+
)
154157
)
155-
)
156158

157159
# Add SinKernelGraph benchmarks
158160
for runtime in self.enabled_runtimes():
@@ -312,11 +314,20 @@ def teardown(self):
312314

313315

314316
class SubmitKernel(ComputeBenchmark):
315-
def __init__(self, bench, runtime: RUNTIMES, ioq, MeasureCompletion=0, UseEvents=0):
317+
def __init__(
318+
self,
319+
bench,
320+
runtime: RUNTIMES,
321+
ioq,
322+
MeasureCompletion=0,
323+
UseEvents=0,
324+
KernelExecTime=1,
325+
):
316326
self.ioq = ioq
317327
self.runtime = runtime
318328
self.MeasureCompletion = MeasureCompletion
319329
self.UseEvents = UseEvents
330+
self.KernelExecTime = KernelExecTime
320331
super().__init__(
321332
bench, f"api_overhead_benchmark_{runtime.value}", "SubmitKernel"
322333
)
@@ -332,7 +343,11 @@ def name(self):
332343
# to match the existing already stored results
333344
events_str = " not using events" if not self.UseEvents else ""
334345

335-
return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}{events_str}"
346+
kernel_exec_time_str = (
347+
f" KernelExecTime=self.KernelExecTime" if self.KernelExecTime != 1 else ""
348+
)
349+
350+
return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}"
336351

337352
def explicit_group(self):
338353
order = "In Order" if self.ioq else "Out Of Order"
@@ -342,7 +357,11 @@ def explicit_group(self):
342357
# to match the existing already stored results
343358
events_str = " not using events" if not self.UseEvents else ""
344359

345-
return f"SubmitKernel {order}{completion_str}{events_str}"
360+
kernel_exec_time_str = (
361+
f" KernelExecTime=self.KernelExecTime" if self.KernelExecTime != 1 else ""
362+
)
363+
364+
return f"SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}"
346365

347366
def description(self) -> str:
348367
order = "in-order" if self.ioq else "out-of-order"
@@ -355,6 +374,7 @@ def description(self) -> str:
355374
return (
356375
f"Measures CPU time overhead of submitting {order} kernels through {runtime_name} API{completion_desc}. "
357376
f"Runs 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time."
377+
f"Each kernel exutes for approximately {self.KernelExecTime} micro seconds."
358378
)
359379

360380
def range(self) -> tuple[float, float]:
@@ -367,7 +387,7 @@ def bin_args(self) -> list[str]:
367387
"--iterations=100000",
368388
"--Profiling=0",
369389
"--NumKernels=10",
370-
"--KernelExecTime=1",
390+
f"--KernelExecTime={self.KernelExecTime}",
371391
f"--UseEvents={self.UseEvents}",
372392
]
373393

0 commit comments

Comments
 (0)