[llvm] [AArch64] Set scalar fneg to free for fnmul (PR #104814)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 19 09:32:15 PDT 2024
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/104814
A fneg(fmul(..)) or fmul(fneg(..)) can be folded into a fnmul under AArch64. https://clang.godbolt.org/z/znPj34Mae
This discounts the cost of the fneg in such patterns to be free.
>From e3c0132703ba744d738a7e0332a32bed14e05744 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 19 Aug 2024 16:20:03 +0100
Subject: [PATCH] [AArch64] Set scalar fneg to free for fnmul
A fneg(fmul(..)) or fmul(fneg(..)) can be folded into a fnmul under AArch64.
https://clang.godbolt.org/z/znPj34Mae
This discounts the cost of the fneg in such patterns to be free.
---
.../Target/AArch64/AArch64TargetTransformInfo.cpp | 9 +++++++++
llvm/test/Analysis/CostModel/AArch64/arith-fp.ll | 12 ++++++------
2 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index a782c9c4351237..f31e1fa9ab3045 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -3242,6 +3242,15 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
return LT.first;
case ISD::FNEG:
+ // Scalar fmul(fneg) or fneg(fmul) can be converted to fnmul
+ if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
+ (Ty->isHalfTy() && ST->hasFullFP16())) &&
+ CxtI &&
+ ((CxtI->hasOneUse() &&
+ match(*CxtI->user_begin(), m_FMul(m_Value(), m_Value()))) ||
+ match(CxtI->getOperand(0), m_FMul(m_Value(), m_Value()))))
+ return 0;
+ [[fallthrough]];
case ISD::FADD:
case ISD::FSUB:
// Increase the cost for half and bfloat types if not architecturally
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
index 84150765d77973..aaffd97b92b2de 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
@@ -133,7 +133,7 @@ define i32 @fneg(i32 %arg) {
define i32 @fmulfneg(i32 %arg) {
; CHECK-LABEL: 'fmulfneg'
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = fneg half undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F16 = fneg half undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16M = fmul half %F16, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = fneg <2 x half> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16M = fmul <2 x half> %V2F16, undef
@@ -143,7 +143,7 @@ define i32 @fmulfneg(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F16M = fmul <8 x half> %V8F16, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fneg <16 x half> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16M = fmul <16 x half> %V16F16, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F32 = fneg float undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32M = fmul float %F32, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fneg <2 x float> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32M = fmul <2 x float> %V2F32, undef
@@ -151,7 +151,7 @@ define i32 @fmulfneg(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32M = fmul <4 x float> %V4F32, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32M = fmul <8 x float> %V8F32, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double undef
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F64 = fneg double undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64M = fmul double %F64, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64M = fmul <2 x double> %V2F64, undef
@@ -192,7 +192,7 @@ define i32 @fmulfneg(i32 %arg) {
define i32 @fnegfmul(i32 %arg) {
; CHECK-LABEL: 'fnegfmul'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16M = fmul half undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = fneg half %F16M
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F16 = fneg half %F16M
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F16M = fmul <2 x half> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F16 = fneg <2 x half> %V2F16M
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F16M = fmul <4 x half> undef, undef
@@ -202,7 +202,7 @@ define i32 @fnegfmul(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F16M = fmul <16 x half> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fneg <16 x half> %V16F16M
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32M = fmul float undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fneg float %F32M
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F32 = fneg float %F32M
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32M = fmul <2 x float> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fneg <2 x float> %V2F32M
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32M = fmul <4 x float> undef, undef
@@ -210,7 +210,7 @@ define i32 @fnegfmul(i32 %arg) {
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32M = fmul <8 x float> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fneg <8 x float> %V8F32M
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64M = fmul double undef, undef
-; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fneg double %F64M
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F64 = fneg double %F64M
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64M = fmul <2 x double> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fneg <2 x double> %V2F64M
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64M = fmul <4 x double> undef, undef
More information about the llvm-commits
mailing list