[llvm] [AArch64] Set scalar fneg to free for fnmul (PR #104814)

Mon Aug 19 09:32:15 PDT 2024

https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/104814

A fneg(fmul(..)) or fmul(fneg(..)) can be folded into a fnmul under AArch64. https://clang.godbolt.org/z/znPj34Mae

This discounts the cost of the fneg in such patterns to be free.

>From e3c0132703ba744d738a7e0332a32bed14e05744 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 19 Aug 2024 16:20:03 +0100
Subject: [PATCH] [AArch64] Set scalar fneg to free for fnmul

A fneg(fmul(..)) or fmul(fneg(..)) can be folded into a fnmul under AArch64.
https://clang.godbolt.org/z/znPj34Mae

This discounts the cost of the fneg in such patterns to be free.
---
 .../Target/AArch64/AArch64TargetTransformInfo.cpp    |  9 +++++++++
 llvm/test/Analysis/CostModel/AArch64/arith-fp.ll     | 12 ++++++------
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index a782c9c4351237..f31e1fa9ab3045 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -3242,6 +3242,15 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
     return LT.first;
 
   case ISD::FNEG:
+    // Scalar fmul(fneg) or fneg(fmul) can be converted to fnmul
+    if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
+         (Ty->isHalfTy() && ST->hasFullFP16())) &&
+        CxtI &&
+        ((CxtI->hasOneUse() &&
+          match(*CxtI->user_begin(), m_FMul(m_Value(), m_Value()))) ||
+         match(CxtI->getOperand(0), m_FMul(m_Value(), m_Value()))))
+      return 0;
+    [[fallthrough]];
   case ISD::FADD:
   case ISD::FSUB:
     // Increase the cost for half and bfloat types if not architecturally
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
index 84150765d77973..aaffd97b92b2de 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
@@ -133,7 +133,7 @@ define i32 @fneg(i32 %arg) {
 
 define i32 @fmulfneg(i32 %arg) {
 ; CHECK-LABEL: 'fmulfneg'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = fneg half undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F16 = fneg half undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16M = fmul half %F16, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = fneg <2 x half> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16M = fmul <2 x half> %V2F16, undef
@@ -143,7 +143,7 @@ define i32 @fmulfneg(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F16M = fmul <8 x half> %V8F16, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fneg <16 x half> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16M = fmul <16 x half> %V16F16, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F32 = fneg float undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32M = fmul float %F32, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fneg <2 x float> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32M = fmul <2 x float> %V2F32, undef
@@ -151,7 +151,7 @@ define i32 @fmulfneg(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32M = fmul <4 x float> %V4F32, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32M = fmul <8 x float> %V8F32, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F64 = fneg double undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64M = fmul double %F64, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64M = fmul <2 x double> %V2F64, undef
@@ -192,7 +192,7 @@ define i32 @fmulfneg(i32 %arg) {
 define i32 @fnegfmul(i32 %arg) {
 ; CHECK-LABEL: 'fnegfmul'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F16M = fmul half undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = fneg half %F16M
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F16 = fneg half %F16M
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F16M = fmul <2 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = fneg <2 x half> %V2F16M
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F16M = fmul <4 x half> undef, undef
@@ -202,7 +202,7 @@ define i32 @fnegfmul(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F16M = fmul <16 x half> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fneg <16 x half> %V16F16M
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32M = fmul float undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float %F32M
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F32 = fneg float %F32M
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F32M = fmul <2 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fneg <2 x float> %V2F32M
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32M = fmul <4 x float> undef, undef
@@ -210,7 +210,7 @@ define i32 @fnegfmul(i32 %arg) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32M = fmul <8 x float> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> %V8F32M
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64M = fmul double undef, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double %F64M
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %F64 = fneg double %F64M
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64M = fmul <2 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> %V2F64M
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64M = fmul <4 x double> undef, undef