@@ -144,15 +144,17 @@ def benchmarks(self) -> list[Benchmark]:
144
144
for in_order_queue in [0 , 1 ]:
145
145
for measure_completion in [0 , 1 ]:
146
146
for use_events in [0 , 1 ]:
147
- benches .append (
148
- SubmitKernel (
149
- self ,
150
- runtime ,
151
- in_order_queue ,
152
- measure_completion ,
153
- use_events ,
147
+ for kernel_exec_time in [1 , 20 ]:
148
+ benches .append (
149
+ SubmitKernel (
150
+ self ,
151
+ runtime ,
152
+ in_order_queue ,
153
+ measure_completion ,
154
+ use_events ,
155
+ kernel_exec_time
156
+ )
154
157
)
155
- )
156
158
157
159
# Add SinKernelGraph benchmarks
158
160
for runtime in self .enabled_runtimes ():
@@ -312,11 +314,12 @@ def teardown(self):
312
314
313
315
314
316
class SubmitKernel (ComputeBenchmark ):
315
- def __init__ (self , bench , runtime : RUNTIMES , ioq , MeasureCompletion = 0 , UseEvents = 0 ):
317
+ def __init__ (self , bench , runtime : RUNTIMES , ioq , MeasureCompletion = 0 , UseEvents = 0 , KernelExecTime = 1 ):
316
318
self .ioq = ioq
317
319
self .runtime = runtime
318
320
self .MeasureCompletion = MeasureCompletion
319
321
self .UseEvents = UseEvents
322
+ self .KernelExecTime = KernelExecTime
320
323
super ().__init__ (
321
324
bench , f"api_overhead_benchmark_{ runtime .value } " , "SubmitKernel"
322
325
)
@@ -332,7 +335,9 @@ def name(self):
332
335
# to match the existing already stored results
333
336
events_str = " not using events" if not self .UseEvents else ""
334
337
335
- return f"api_overhead_benchmark_{ self .runtime .value } SubmitKernel { order } { completion_str } { events_str } "
338
+ kernel_exec_time_str = f" KernelExecTime=self.KernelExecTime" if self .KernelExecTime != 1 else ""
339
+
340
+ return f"api_overhead_benchmark_{ self .runtime .value } SubmitKernel { order } { completion_str } { events_str } { kernel_exec_time_str } "
336
341
337
342
def explicit_group (self ):
338
343
order = "In Order" if self .ioq else "Out Of Order"
@@ -342,7 +347,9 @@ def explicit_group(self):
342
347
# to match the existing already stored results
343
348
events_str = " not using events" if not self .UseEvents else ""
344
349
345
- return f"SubmitKernel { order } { completion_str } { events_str } "
350
+ kernel_exec_time_str = f" KernelExecTime=self.KernelExecTime" if self .KernelExecTime != 1 else ""
351
+
352
+ return f"SubmitKernel { order } { completion_str } { events_str } { kernel_exec_time_str } "
346
353
347
354
def description (self ) -> str :
348
355
order = "in-order" if self .ioq else "out-of-order"
@@ -355,6 +362,7 @@ def description(self) -> str:
355
362
return (
356
363
f"Measures CPU time overhead of submitting { order } kernels through { runtime_name } API{ completion_desc } . "
357
364
f"Runs 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time."
365
+ f"Each kernel exutes for approximately { self .KernelExecTime } micro seconds."
358
366
)
359
367
360
368
def range (self ) -> tuple [float , float ]:
@@ -367,7 +375,7 @@ def bin_args(self) -> list[str]:
367
375
"--iterations=100000" ,
368
376
"--Profiling=0" ,
369
377
"--NumKernels=10" ,
370
- "--KernelExecTime=1 " ,
378
+ f "--KernelExecTime={ self . KernelExecTime } " ,
371
379
f"--UseEvents={ self .UseEvents } " ,
372
380
]
373
381
0 commit comments