[llvm] [llvm][AMDGPU] Implemented isProfitableToHoist and isFMAFasterThanFMulAndFAdd (PR #108756)

via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 15 06:57:41 PDT 2024


https://github.com/braw-lee created https://github.com/llvm/llvm-project/pull/108756

Fixes issue #108751 

>From 92d4337a745d6d8ee0a3d600a5a2d5f81dfec29c Mon Sep 17 00:00:00 2001
From: Kushal Pal <kushalpal109 at gmail.com>
Date: Sun, 15 Sep 2024 19:23:22 +0530
Subject: [PATCH] [llvm][AMDGPU] Implemented isProfitableToHoist and
 isFMAFasterThanFMulAndFAdd

Signed-off-by: Kushal Pal <kushalpal109 at gmail.com>
---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 58 +++++++++++++++++++
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h   |  9 +++
 2 files changed, 67 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 81852f6a130584..4cbb3898f4187b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -891,6 +891,37 @@ bool AMDGPUTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
   return true;
 }
 
+bool AMDGPUTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+                                                      EVT VT) const {
+  VT = VT.getScalarType();
+
+  if (!VT.isSimple())
+    return false;
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  case MVT::f16:
+    return Subtarget->hasFullFP16();
+  case MVT::f32:
+  case MVT::f64:
+    return true;
+  default:
+    break;
+  }
+
+  return false;
+}
+
+bool AMDGPUTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
+                                                      Type *Ty) const {
+  switch (Ty->getScalarType()->getTypeID()) {
+  case Type::FloatTyID:
+  case Type::DoubleTyID:
+    return true;
+  default:
+    return false;
+  }
+}
+
 bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode *N) const {
   switch (N->getOpcode()) {
   case ISD::EntryToken:
@@ -1000,6 +1031,33 @@ bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const {
   return DestSize < SrcSize && DestSize % 32 == 0;
 }
 
+/// Check if it is profitable to hoist instruction in then/else to if.
+/// Not profitable if I and it's user can form a FMA instruction
+/// because we prefer FMSUB/FMADD.
+bool AMDGPUTargetLowering::isProfitableToHoist(Instruction *I) const {
+  if (I->getOpcode() != Instruction::FMul)
+    return true;
+
+  if (!I->hasOneUse())
+    return true;
+
+  Instruction *User = I->user_back();
+
+  if (!(User->getOpcode() == Instruction::FSub ||
+        User->getOpcode() == Instruction::FAdd))
+    return true;
+
+  const TargetOptions &Options = getTargetMachine().Options;
+  const Function *F = I->getFunction();
+  const DataLayout &DL = F->getDataLayout();
+  Type *Ty = User->getOperand(0)->getType();
+
+  return !(
+      isFMAFasterThanFMulAndFAdd(*F, Ty) &&
+      isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
+      (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
+}
+
 bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const {
   unsigned SrcSize = Src->getScalarSizeInBits();
   unsigned DestSize = Dest->getScalarSizeInBits();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 18b5c388f32932..9e5b23d0126ab1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -193,6 +193,8 @@ class AMDGPUTargetLowering : public TargetLowering {
   bool isTruncateFree(EVT Src, EVT Dest) const override;
   bool isTruncateFree(Type *Src, Type *Dest) const override;
 
+  bool isProfitableToHoist(Instruction *I) const override;
+
   bool isZExtFree(Type *Src, Type *Dest) const override;
   bool isZExtFree(EVT Src, EVT Dest) const override;
 
@@ -229,6 +231,13 @@ class AMDGPUTargetLowering : public TargetLowering {
   bool isCheapToSpeculateCttz(Type *Ty) const override;
   bool isCheapToSpeculateCtlz(Type *Ty) const override;
 
+  /// Return true if an FMA operation is faster than a pair of fmul and fadd
+  /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
+  /// returns true, otherwise fmuladd is expanded to fmul + fadd.
+  bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+                                  EVT VT) const override;
+  bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
+
   bool isSDNodeAlwaysUniform(const SDNode *N) const override;
 
   // FIXME: This hook should not exist



More information about the llvm-commits mailing list