@@ -144,15 +144,17 @@ def benchmarks(self) -> list[Benchmark]:
144
144
for in_order_queue in [0 , 1 ]:
145
145
for measure_completion in [0 , 1 ]:
146
146
for use_events in [0 , 1 ]:
147
- benches .append (
148
- SubmitKernel (
149
- self ,
150
- runtime ,
151
- in_order_queue ,
152
- measure_completion ,
153
- use_events ,
147
+ for kernel_exec_time in [1 , 20 ]:
148
+ benches .append (
149
+ SubmitKernel (
150
+ self ,
151
+ runtime ,
152
+ in_order_queue ,
153
+ measure_completion ,
154
+ use_events ,
155
+ kernel_exec_time ,
156
+ )
154
157
)
155
- )
156
158
157
159
# Add SinKernelGraph benchmarks
158
160
for runtime in self .enabled_runtimes ():
@@ -312,11 +314,20 @@ def teardown(self):
312
314
313
315
314
316
class SubmitKernel (ComputeBenchmark ):
315
- def __init__ (self , bench , runtime : RUNTIMES , ioq , MeasureCompletion = 0 , UseEvents = 0 ):
317
+ def __init__ (
318
+ self ,
319
+ bench ,
320
+ runtime : RUNTIMES ,
321
+ ioq ,
322
+ MeasureCompletion = 0 ,
323
+ UseEvents = 0 ,
324
+ KernelExecTime = 1 ,
325
+ ):
316
326
self .ioq = ioq
317
327
self .runtime = runtime
318
328
self .MeasureCompletion = MeasureCompletion
319
329
self .UseEvents = UseEvents
330
+ self .KernelExecTime = KernelExecTime
320
331
super ().__init__ (
321
332
bench , f"api_overhead_benchmark_{ runtime .value } " , "SubmitKernel"
322
333
)
@@ -332,7 +343,11 @@ def name(self):
332
343
# to match the existing already stored results
333
344
events_str = " not using events" if not self .UseEvents else ""
334
345
335
- return f"api_overhead_benchmark_{ self .runtime .value } SubmitKernel { order } { completion_str } { events_str } "
346
+ kernel_exec_time_str = (
347
+ f" KernelExecTime=self.KernelExecTime" if self .KernelExecTime != 1 else ""
348
+ )
349
+
350
+ return f"api_overhead_benchmark_{ self .runtime .value } SubmitKernel { order } { completion_str } { events_str } { kernel_exec_time_str } "
336
351
337
352
def explicit_group (self ):
338
353
order = "In Order" if self .ioq else "Out Of Order"
@@ -342,7 +357,11 @@ def explicit_group(self):
342
357
# to match the existing already stored results
343
358
events_str = " not using events" if not self .UseEvents else ""
344
359
345
- return f"SubmitKernel { order } { completion_str } { events_str } "
360
+ kernel_exec_time_str = (
361
+ f" KernelExecTime=self.KernelExecTime" if self .KernelExecTime != 1 else ""
362
+ )
363
+
364
+ return f"SubmitKernel { order } { completion_str } { events_str } { kernel_exec_time_str } "
346
365
347
366
def description (self ) -> str :
348
367
order = "in-order" if self .ioq else "out-of-order"
@@ -355,6 +374,7 @@ def description(self) -> str:
355
374
return (
356
375
f"Measures CPU time overhead of submitting { order } kernels through { runtime_name } API{ completion_desc } . "
357
376
f"Runs 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time."
377
+ f"Each kernel exutes for approximately { self .KernelExecTime } micro seconds."
358
378
)
359
379
360
380
def range (self ) -> tuple [float , float ]:
@@ -367,7 +387,7 @@ def bin_args(self) -> list[str]:
367
387
"--iterations=100000" ,
368
388
"--Profiling=0" ,
369
389
"--NumKernels=10" ,
370
- "--KernelExecTime=1 " ,
390
+ f "--KernelExecTime={ self . KernelExecTime } " ,
371
391
f"--UseEvents={ self .UseEvents } " ,
372
392
]
373
393
0 commit comments