Skip to content

Commit f5b2d24

Browse files
committed
[NVPTX] Move roundeven on bf16 into the sm_90 has it but sm_80 doesn't bucket
1 parent 0e63672 commit f5b2d24

File tree

2 files changed

+13
-0
lines changed

2 files changed

+13
-0
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
447447
case ISD::FFLOOR:
448448
case ISD::FNEARBYINT:
449449
case ISD::FRINT:
450+
case ISD::FROUNDEVEN:
450451
case ISD::FTRUNC:
451452
IsOpSupported = STI.getSmVersion() >= 90 && STI.getPTXVersion() >= 78;
452453
break;

llvm/test/CodeGen/NVPTX/bf16-instructions.ll

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,3 +330,15 @@ define bfloat @test_uitofp_i64(i64 %a) {
330330
%r = uitofp i64 %a to bfloat
331331
ret bfloat %r
332332
}
333+
334+
; CHECK-LABEL: test_roundeven(
335+
; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_roundeven_param_0];
336+
; SM80: cvt.rni.f32.f32 [[F:%f[0-9]+]]
337+
; SM80: cvt.rn.bf16.f32 [[R:%rs[0-9]+]], [[F]];
338+
; SM90: cvt.rni.bf16.bf16 [[R:%rs[0-9]+]], [[A]];
339+
; CHECK: st.param.b16 [func_retval0+0], [[R]];
340+
; CHECK: ret;
341+
define bfloat @test_roundeven(bfloat %a) {
342+
%r = call bfloat @llvm.roundeven.bf16(bfloat %a)
343+
ret bfloat %r
344+
}

0 commit comments

Comments
 (0)