[llvm] [NVPTX] Select bfloat16 add/mul/sub as fma on SM80 (PR #121065)

Thu Jan 9 14:34:59 PST 2025

================
@@ -42,17 +42,14 @@ define bfloat @test_fadd(bfloat %0, bfloat %1) {
 ;
 ; SM80-LABEL: test_fadd(
 ; SM80:       {
-; SM80-NEXT:    .reg .b16 %rs<4>;
-; SM80-NEXT:    .reg .f32 %f<4>;
+; SM80-NEXT:    .reg .b16 %rs<5>;
 ; SM80-EMPTY:
 ; SM80-NEXT:  // %bb.0:
 ; SM80-NEXT:    ld.param.b16 %rs1, [test_fadd_param_0];
 ; SM80-NEXT:    ld.param.b16 %rs2, [test_fadd_param_1];
-; SM80-NEXT:    cvt.f32.bf16 %f1, %rs2;
-; SM80-NEXT:    cvt.f32.bf16 %f2, %rs1;
-; SM80-NEXT:    add.rn.f32 %f3, %f2, %f1;
-; SM80-NEXT:    cvt.rn.bf16.f32 %rs3, %f3;
-; SM80-NEXT:    st.param.b16 [func_retval0], %rs3;
+; SM80-NEXT:    mov.b16 %rs3, 0x3F80;
+; SM80-NEXT:    fma.rn.bf16 %rs4, %rs1, %rs3, %rs2;
----------------
peterbell10 wrote:

Every test here has an `SM80-FTZ-NEXT` variant, they just don't show up in the diff because they haven't changed.

https://github.com/llvm/llvm-project/pull/121065