[llvm] e2490b7 - [AMDGPU] New test case where it is better not to form FMA

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 1 05:48:24 PDT 2023


Author: Jay Foad
Date: 2023-06-01T13:44:15+01:00
New Revision: e2490b73914e4ae5a0dab78d36a9ffdd5bc6e98b

URL: https://github.com/llvm/llvm-project/commit/e2490b73914e4ae5a0dab78d36a9ffdd5bc6e98b
DIFF: https://github.com/llvm/llvm-project/commit/e2490b73914e4ae5a0dab78d36a9ffdd5bc6e98b.diff

LOG: [AMDGPU] New test case where it is better not to form FMA

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
index b63d7081e748a..485b3790a9cb1 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
@@ -257,6 +257,23 @@ define amdgpu_ps float @fmac_sequence_innermost_fmul_multiple_use(float inreg %a
   ret float %t7
 }
 
+; "fmul %m, 2.0" could select to an FMA instruction, but it is no better than
+; selecting it as a multiply. In some cases the multiply is better because
+; SIFoldOperands can fold it into a previous instruction as an output modifier.
+define amdgpu_ps float @fma_vs_output_modifier(float %x, i32 %n) #0 {
+; GCN-LABEL: fma_vs_output_modifier:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_cvt_f32_i32_e64 v1, v1 mul:2
+; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GCN-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GCN-NEXT:    ; return to shader part epilog
+  %s = sitofp i32 %n to float
+  %m = fmul contract float %x, %x
+  %a = fmul contract float %m, 2.0
+  %r = fmul reassoc nsz float %a, %s
+  ret float %r
+}
+
 ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
 declare float @llvm.maxnum.f32(float, float) #1
 


        


More information about the llvm-commits mailing list