Skip to content

Commit de9236b

Browse files
committed
[Bench] add SubmitKernel benchmark variant wiht longer kernel
To test OOO queues
1 parent b8cc70c commit de9236b

File tree

1 file changed

+20
-12
lines changed

1 file changed

+20
-12
lines changed

devops/scripts/benchmarks/benches/compute.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -144,15 +144,17 @@ def benchmarks(self) -> list[Benchmark]:
144144
for in_order_queue in [0, 1]:
145145
for measure_completion in [0, 1]:
146146
for use_events in [0, 1]:
147-
benches.append(
148-
SubmitKernel(
149-
self,
150-
runtime,
151-
in_order_queue,
152-
measure_completion,
153-
use_events,
147+
for kernel_exec_time in [1, 20]:
148+
benches.append(
149+
SubmitKernel(
150+
self,
151+
runtime,
152+
in_order_queue,
153+
measure_completion,
154+
use_events,
155+
kernel_exec_time
156+
)
154157
)
155-
)
156158

157159
# Add SinKernelGraph benchmarks
158160
for runtime in self.enabled_runtimes():
@@ -312,11 +314,12 @@ def teardown(self):
312314

313315

314316
class SubmitKernel(ComputeBenchmark):
315-
def __init__(self, bench, runtime: RUNTIMES, ioq, MeasureCompletion=0, UseEvents=0):
317+
def __init__(self, bench, runtime: RUNTIMES, ioq, MeasureCompletion=0, UseEvents=0, KernelExecTime=1):
316318
self.ioq = ioq
317319
self.runtime = runtime
318320
self.MeasureCompletion = MeasureCompletion
319321
self.UseEvents = UseEvents
322+
self.KernelExecTime = KernelExecTime
320323
super().__init__(
321324
bench, f"api_overhead_benchmark_{runtime.value}", "SubmitKernel"
322325
)
@@ -332,7 +335,9 @@ def name(self):
332335
# to match the existing already stored results
333336
events_str = " not using events" if not self.UseEvents else ""
334337

335-
return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}{events_str}"
338+
kernel_exec_time_str = f" KernelExecTime=self.KernelExecTime" if self.KernelExecTime != 1 else ""
339+
340+
return f"api_overhead_benchmark_{self.runtime.value} SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}"
336341

337342
def explicit_group(self):
338343
order = "In Order" if self.ioq else "Out Of Order"
@@ -342,7 +347,9 @@ def explicit_group(self):
342347
# to match the existing already stored results
343348
events_str = " not using events" if not self.UseEvents else ""
344349

345-
return f"SubmitKernel {order}{completion_str}{events_str}"
350+
kernel_exec_time_str = f" KernelExecTime=self.KernelExecTime" if self.KernelExecTime != 1 else ""
351+
352+
return f"SubmitKernel {order}{completion_str}{events_str}{kernel_exec_time_str}"
346353

347354
def description(self) -> str:
348355
order = "in-order" if self.ioq else "out-of-order"
@@ -355,6 +362,7 @@ def description(self) -> str:
355362
return (
356363
f"Measures CPU time overhead of submitting {order} kernels through {runtime_name} API{completion_desc}. "
357364
f"Runs 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time."
365+
f"Each kernel exutes for approximately {self.KernelExecTime} micro seconds."
358366
)
359367

360368
def range(self) -> tuple[float, float]:
@@ -367,7 +375,7 @@ def bin_args(self) -> list[str]:
367375
"--iterations=100000",
368376
"--Profiling=0",
369377
"--NumKernels=10",
370-
"--KernelExecTime=1",
378+
f"--KernelExecTime={self.KernelExecTime}",
371379
f"--UseEvents={self.UseEvents}",
372380
]
373381

0 commit comments

Comments
 (0)