[llvm] [AMDGPU] Implement IR variant of isFMAFasterThanFMulAndFAdd (PR #121465)
Chinmay Deshpande via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 7 04:42:27 PST 2025
https://github.com/chinmaydd updated https://github.com/llvm/llvm-project/pull/121465
>From b4de8eccd3d8b4b2eb04a16f95042296f363e8b5 Mon Sep 17 00:00:00 2001
From: Chinmay Deshpande <ChinmayDiwakar.Deshpande at amd.com>
Date: Tue, 7 Jan 2025 07:41:54 -0500
Subject: [PATCH] [AMDGPU] Implement IR variant of isFMAFasterThanFMulAndFAdd
Change-Id: I4e515a1ca6c792500ea8a946e17dc6145e0ecedc
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 56 +++++++++++++++++++++++
llvm/lib/Target/AMDGPU/SIISelLowering.h | 3 ++
2 files changed, 59 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b3cfa398d9b5f6..170e9022c7a03e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5731,6 +5731,35 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
return false;
}
+// Refer to comments added to the MIR variant of isFMAFasterThanFMulAndFAdd for
+// specific details.
+bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
+ Type *Ty) const {
+ switch (Ty->getScalarSizeInBits()) {
+ case 16: {
+ SIModeRegisterDefaults Mode = SIModeRegisterDefaults(F, *Subtarget);
+ return Subtarget->has16BitInsts() &&
+ Mode.FP64FP16Denormals != DenormalMode::getPreserveSign();
+ }
+ case 32: {
+ if (!Subtarget->hasMadMacF32Insts())
+ return Subtarget->hasFastFMAF32();
+
+ SIModeRegisterDefaults Mode = SIModeRegisterDefaults(F, *Subtarget);
+ if (Mode.FP32Denormals != DenormalMode::getPreserveSign())
+ return Subtarget->hasFastFMAF32() || Subtarget->hasDLInsts();
+
+ return Subtarget->hasFastFMAF32() && Subtarget->hasDLInsts();
+ }
+ case 64:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
bool SITargetLowering::isFMADLegal(const MachineInstr &MI, LLT Ty) const {
if (!Ty.isScalar())
return false;
@@ -16945,6 +16974,33 @@ bool SITargetLowering::checkForPhysRegDependency(
return false;
}
+/// Check if it is profitable to hoist instruction in then/else to if.
+bool SITargetLowering::isProfitableToHoist(Instruction *I) const {
+ if (!I->hasOneUse())
+ return true;
+
+ Instruction *User = I->user_back();
+ // TODO: Add more patterns that are not profitable to hoist
+ switch (I->getOpcode()) {
+ case Instruction::FMul: {
+ if (User->getOpcode() != Instruction::FSub &&
+ User->getOpcode() != Instruction::FAdd)
+ return true;
+
+ const TargetOptions &Options = getTargetMachine().Options;
+ const Function *F = I->getFunction();
+
+ return ((!I->hasAllowContract() || !User->hasAllowContract()) &&
+ Options.AllowFPOpFusion != FPOpFusion::Fast &&
+ !Options.UnsafeFPMath) ||
+ !isFMAFasterThanFMulAndFAdd(*F, User->getType());
+ }
+ default:
+ return true;
+ }
+ return true;
+}
+
void SITargetLowering::emitExpandAtomicAddrSpacePredicate(
Instruction *AI) const {
// Given: atomicrmw fadd ptr %addr, float %val ordering
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index f4641e7a659907..d7bb03f25fc02c 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -456,6 +456,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
EVT VT) const override;
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
const LLT Ty) const override;
+ bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const override;
bool isFMADLegal(const MachineInstr &MI, const LLT Ty) const override;
@@ -535,6 +536,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
const TargetInstrInfo *TII, unsigned &PhysReg,
int &Cost) const override;
+ bool isProfitableToHoist(Instruction *I) const override;
+
bool isKnownNeverNaNForTargetNode(SDValue Op,
const SelectionDAG &DAG,
bool SNaN = false,
More information about the llvm-commits
mailing list