[llvm] c61d565 - [AArch64] Set scalar fneg to free for fnmul (#104814)

Wed Aug 21 10:10:20 PDT 2024

Author: David Green
Date: 2024-08-21T18:10:16+01:00
New Revision: c61d565721d0cf03e2658ec65a3526dd89142e52

URL: https://github.com/llvm/llvm-project/commit/c61d565721d0cf03e2658ec65a3526dd89142e52
DIFF: https://github.com/llvm/llvm-project/commit/c61d565721d0cf03e2658ec65a3526dd89142e52.diff

LOG: [AArch64] Set scalar fneg to free for fnmul (#104814)

A fneg(fmul(..)) or fmul(fneg(..)) can be folded into a fnmul under
AArch64. https://clang.godbolt.org/z/znPj34Mae

This discounts the cost of the fneg in such patterns to be free.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/AArch64/arith-fp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index a782c9c4351237..f31e1fa9ab3045 100644

--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -3242,6 +3242,15 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
     return LT.first;
 
   case ISD::FNEG:
+    // Scalar fmul(fneg) or fneg(fmul) can be converted to fnmul
+    if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
+         (Ty->isHalfTy() && ST->hasFullFP16())) &&
+        CxtI &&
+        ((CxtI->hasOneUse() &&
+          match(*CxtI->user_begin(), m_FMul(m_Value(), m_Value()))) ||
+         match(CxtI->getOperand(0), m_FMul(m_Value(), m_Value()))))
+      return 0;
+    [[fallthrough]];
   case ISD::FADD:
   case ISD::FSUB:
     // Increase the cost for half and bfloat types if not architecturally

diff  --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
index 84150765d77973..aaffd97b92b2de 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
@@ -133,7 +133,7 @@ define i32 @fneg(i32 %arg) {
 
 define i32 @fmulfneg(i32 %arg) {
 ; CHECK-LABEL: 'fmulfneg'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = fneg half undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F16 = fneg half undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16M = fmul half %F16, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = fneg <2 x half> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16M = fmul <2 x half> %V2F16, undef
@@ -143,7 +143,7 @@ define i32 @fmulfneg(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16M = fmul <8 x half> %V8F16, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fneg <16 x half> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16M = fmul <16 x half> %V16F16, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F32 = fneg float undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32M = fmul float %F32, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fneg <2 x float> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32M = fmul <2 x float> %V2F32, undef
@@ -151,7 +151,7 @@ define i32 @fmulfneg(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32M = fmul <4 x float> %V4F32, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32M = fmul <8 x float> %V8F32, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F64 = fneg double undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64M = fmul double %F64, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64M = fmul <2 x double> %V2F64, undef
@@ -192,7 +192,7 @@ define i32 @fmulfneg(i32 %arg) {
 define i32 @fnegfmul(i32 %arg) {
 ; CHECK-LABEL: 'fnegfmul'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16M = fmul half undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = fneg half %F16M
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F16 = fneg half %F16M
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16M = fmul <2 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = fneg <2 x half> %V2F16M
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16M = fmul <4 x half> undef, undef
@@ -202,7 +202,7 @@ define i32 @fnegfmul(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16M = fmul <16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fneg <16 x half> %V16F16M
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32M = fmul float undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float %F32M
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F32 = fneg float %F32M
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32M = fmul <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fneg <2 x float> %V2F32M
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32M = fmul <4 x float> undef, undef
@@ -210,7 +210,7 @@ define i32 @fnegfmul(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32M = fmul <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> %V8F32M
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64M = fmul double undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double %F64M
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F64 = fneg double %F64M
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64M = fmul <2 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> %V2F64M
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64M = fmul <4 x double> undef, undef