[llvm] [AMDGPU] Implement IR variant of isFMAFasterThanFMulAndFAdd (PR #121465)
Chinmay Deshpande via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 2 02:12:15 PST 2025
https://github.com/chinmaydd created https://github.com/llvm/llvm-project/pull/121465
Fixes #108751 . Thanks @Shoreshen for helping out with the test case.
>From d08abf2677d728ba2c0a2aa635bf2263ee5eaba2 Mon Sep 17 00:00:00 2001
From: Chinmay Deshpande <ChinmayDiwakar.Deshpande at amd.com>
Date: Thu, 12 Dec 2024 23:51:39 -0500
Subject: [PATCH] [AMDGPU] Implement IR variant of isFMAFasterThanFMulAndFAdd
Change-Id: I2484db303227da9aa53cc8842283c4ba6a332b3a
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 58 ++++++
llvm/lib/Target/AMDGPU/SIISelLowering.h | 3 +
.../AMDGPU/is-profitable-to-hoist-ir.ll | 185 ++++++++++++++++++
3 files changed, 246 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/is-profitable-to-hoist-ir.ll
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 58b061f5c1af0d..e610f2627d2cd8 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5728,6 +5728,33 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
return false;
}
+// Refer to comments added to the MIR variant of isFMAFasterThanFMulAndFAdd for
+// specific details.
+bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
+ Type *Ty) const {
+ SIModeRegisterDefaults Mode = SIModeRegisterDefaults(F, *Subtarget);
+ switch (Ty->getScalarSizeInBits()) {
+ case 32: {
+ if (!Subtarget->hasMadMacF32Insts())
+ return Subtarget->hasFastFMAF32();
+
+ if (Mode.FP32Denormals != DenormalMode::getPreserveSign())
+ return Subtarget->hasFastFMAF32() || Subtarget->hasDLInsts();
+
+ return Subtarget->hasFastFMAF32() && Subtarget->hasDLInsts();
+ }
+ case 64:
+ return true;
+ case 16:
+ return Subtarget->has16BitInsts() &&
+ Mode.FP64FP16Denormals != DenormalMode::getPreserveSign();
+ default:
+ break;
+ }
+
+ return false;
+}
+
bool SITargetLowering::isFMADLegal(const MachineInstr &MI, LLT Ty) const {
if (!Ty.isScalar())
return false;
@@ -16942,6 +16969,37 @@ bool SITargetLowering::checkForPhysRegDependency(
return false;
}
+/// Check if it is profitable to hoist instruction in then/else to if.
+/// Not profitable if I and it's user can form a FMA instruction
+/// because we prefer FMSUB/FMADD.
+bool SITargetLowering::isProfitableToHoist(Instruction *I) const {
+ if (!I->hasOneUse())
+ return true;
+
+ Instruction *User = I->user_back();
+ // TODO: Add more patterns that are not profitable to hoist
+ switch (I->getOpcode()) {
+ case Instruction::FMul: {
+ if (User->getOpcode() != Instruction::FSub &&
+ User->getOpcode() != Instruction::FAdd)
+ return true;
+
+ const TargetOptions &Options = getTargetMachine().Options;
+ const Function *F = I->getFunction();
+ const DataLayout &DL = F->getDataLayout();
+ Type *Ty = User->getOperand(0)->getType();
+
+ return !isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) ||
+ (Options.AllowFPOpFusion != FPOpFusion::Fast &&
+ !Options.UnsafeFPMath) ||
+ !isFMAFasterThanFMulAndFAdd(*F, Ty);
+ }
+ default:
+ return true;
+ }
+ return true;
+}
+
void SITargetLowering::emitExpandAtomicAddrSpacePredicate(
Instruction *AI) const {
// Given: atomicrmw fadd ptr %addr, float %val ordering
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 631f26542bbe6d..731fb5d79a90d4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -457,6 +457,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
EVT VT) const override;
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
const LLT Ty) const override;
+ bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const override;
bool isFMADLegal(const MachineInstr &MI, const LLT Ty) const override;
@@ -536,6 +537,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
const TargetInstrInfo *TII, unsigned &PhysReg,
int &Cost) const override;
+ bool isProfitableToHoist(Instruction *I) const override;
+
bool isKnownNeverNaNForTargetNode(SDValue Op,
const SelectionDAG &DAG,
bool SNaN = false,
diff --git a/llvm/test/CodeGen/AMDGPU/is-profitable-to-hoist-ir.ll b/llvm/test/CodeGen/AMDGPU/is-profitable-to-hoist-ir.ll
new file mode 100644
index 00000000000000..3c204fda38d458
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/is-profitable-to-hoist-ir.ll
@@ -0,0 +1,185 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=simplifycfg -verify-machineinstrs -hoist-common-insts=true -mtriple=amdgcn-- -mcpu=gfx1030 -fp-contract=fast < %s | FileCheck -check-prefix=GFX -check-prefix=GFX-FP-CONTRACT %s
+; RUN: opt -S -passes=simplifycfg -verify-machineinstrs -hoist-common-insts=true -mtriple=amdgcn-- -mcpu=gfx1030 -enable-unsafe-fp-math --denormal-fp-math=ieee < %s | FileCheck -check-prefix=GFX -check-prefix=GFX-UNSAFE-FP-IEEE %s
+; RUN: opt -S -passes=simplifycfg -verify-machineinstrs -hoist-common-insts=true -mtriple=amdgcn-- -mcpu=gfx1030 -enable-unsafe-fp-math --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX -check-prefix=GFX-UNSAFE-FP-PRESERVE %s
+
+define double @_branch(ptr dereferenceable(8) %x, ptr dereferenceable(8) %y, ptr dereferenceable(8) %a) #0 {
+; GFX-LABEL: define double @_branch(
+; GFX-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; GFX-NEXT: [[ENTRY:.*:]]
+; GFX-NEXT: [[TMP0:%.*]] = load double, ptr [[Y]], align 8
+; GFX-NEXT: [[CMP:%.*]] = fcmp oeq double [[TMP0]], 0.000000e+00
+; GFX-NEXT: [[TMP1:%.*]] = load double, ptr [[X]], align 8
+; GFX-NEXT: [[TMP2:%.*]] = load double, ptr [[A]], align 8
+; GFX-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; GFX: [[COMMON_RET:.*]]:
+; GFX-NEXT: [[COMMON_RET_OP:%.*]] = phi double [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
+; GFX-NEXT: ret double [[COMMON_RET_OP]]
+; GFX: [[IF_THEN]]:
+; GFX-NEXT: [[MUL]] = fmul fast double [[TMP1]], [[TMP2]]
+; GFX-NEXT: [[ADD:%.*]] = fadd fast double 1.000000e+00, [[MUL]]
+; GFX-NEXT: br label %[[COMMON_RET]]
+; GFX: [[IF_ELSE]]:
+; GFX-NEXT: [[MUL1:%.*]] = fmul fast double [[TMP1]], [[TMP2]]
+; GFX-NEXT: [[SUB]] = fsub fast double [[MUL1]], [[TMP0]]
+; GFX-NEXT: br label %[[COMMON_RET]]
+;
+entry:
+ %0 = load double, ptr %y, align 8
+ %cmp = fcmp oeq double %0, 0.000000e+00
+ %1 = load double, ptr %x, align 8
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %2 = load double, ptr %a, align 8
+ %mul = fmul fast double %1, %2
+ %add = fadd fast double 1.000000e+00, %mul
+ ret double %mul
+
+if.else: ; preds = %entry
+ %3 = load double, ptr %a, align 8
+ %mul1 = fmul fast double %1, %3
+ %sub = fsub fast double %mul1, %0
+ ret double %sub
+}
+
+define float @_branch2(ptr dereferenceable(8) %x, ptr dereferenceable(8) %y, ptr dereferenceable(8) %a) #0 {
+; GFX-LABEL: define float @_branch2(
+; GFX-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
+; GFX-NEXT: [[ENTRY:.*:]]
+; GFX-NEXT: [[TMP0:%.*]] = load float, ptr [[Y]], align 8
+; GFX-NEXT: [[CMP:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00
+; GFX-NEXT: [[TMP1:%.*]] = load float, ptr [[X]], align 8
+; GFX-NEXT: [[TMP2:%.*]] = load float, ptr [[A]], align 8
+; GFX-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; GFX: [[COMMON_RET:.*]]:
+; GFX-NEXT: [[COMMON_RET_OP:%.*]] = phi float [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
+; GFX-NEXT: ret float [[COMMON_RET_OP]]
+; GFX: [[IF_THEN]]:
+; GFX-NEXT: [[MUL]] = fmul fast float [[TMP1]], [[TMP2]]
+; GFX-NEXT: [[ADD:%.*]] = fadd fast float 1.000000e+00, [[MUL]]
+; GFX-NEXT: br label %[[COMMON_RET]]
+; GFX: [[IF_ELSE]]:
+; GFX-NEXT: [[MUL1:%.*]] = fmul fast float [[TMP1]], [[TMP2]]
+; GFX-NEXT: [[SUB]] = fsub fast float [[MUL1]], [[TMP0]]
+; GFX-NEXT: br label %[[COMMON_RET]]
+;
+entry:
+ %0 = load float, ptr %y, align 8
+ %cmp = fcmp oeq float %0, 0.000000e+00
+ %1 = load float, ptr %x, align 8
+ br i1 %cmp, label %if.then, label %if.else
+
+
+if.then: ; preds = %entry
+ %2 = load float, ptr %a, align 8
+ %mul = fmul fast float %1, %2
+ %add = fadd fast float 1.000000e+00, %mul
+ ret float %mul
+
+if.else: ; preds = %entry
+ %3 = load float, ptr %a, align 8
+ %mul1 = fmul fast float %1, %3
+ %sub = fsub fast float %mul1, %0
+ ret float %sub
+}
+
+define half @_branch3(ptr dereferenceable(8) %x, ptr dereferenceable(8) %y, ptr dereferenceable(8) %a) #0 {
+; GFX-CONTRACT-LABEL: define half @_branchr32(
+; GFX-CONTRACT-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
+; GFX-CONTRACT-NEXT: [[ENTRY:.*:]]
+; GFX-CONTRACT-NEXT: [[TMP0:%.*]] = load half, ptr [[Y]], align 8
+; GFX-CONTRACT-NEXT: [[CMP:%.*]] = fcmp oeq half [[TMP0]], 0xH0000
+; GFX-CONTRACT-NEXT: [[TMP1:%.*]] = load half, ptr [[X]], align 8
+; GFX-CONTRACT-NEXT: [[TMP2:%.*]] = load half, ptr [[A]], align 8
+; GFX-CONTRACT-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; GFX-CONTRACT: [[COMMON_RET:.*]]:
+; GFX-CONTRACT-NEXT: [[COMMON_RET_OP:%.*]] = phi half [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
+; GFX-CONTRACT-NEXT: ret half [[COMMON_RET_OP]]
+; GFX-CONTRACT: [[IF_THEN]]:
+; GFX-CONTRACT-NEXT: [[MUL]] = fmul fast half [[TMP1]], [[TMP2]]
+; GFX-CONTRACT-NEXT: [[ADD:%.*]] = fadd fast half 0xH3C00, [[MUL]]
+; GFX-CONTRACT-NEXT: br label %[[COMMON_RET]]
+; GFX-CONTRACT: [[IF_ELSE]]:
+; GFX-CONTRACT-NEXT: [[MUL1:%.*]] = fmul fast half [[TMP1]], [[TMP2]]
+; GFX-CONTRACT-NEXT: [[SUB]] = fsub fast half [[MUL1]], [[TMP0]]
+; GFX-CONTRACT-NEXT: br label %[[COMMON_RET]]
+;
+; GFX-FP-CONTRACT-LABEL: define half @_branch3(
+; GFX-FP-CONTRACT-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
+; GFX-FP-CONTRACT-NEXT: [[ENTRY:.*:]]
+; GFX-FP-CONTRACT-NEXT: [[TMP0:%.*]] = load half, ptr [[Y]], align 8
+; GFX-FP-CONTRACT-NEXT: [[CMP:%.*]] = fcmp oeq half [[TMP0]], 0xH0000
+; GFX-FP-CONTRACT-NEXT: [[TMP1:%.*]] = load half, ptr [[X]], align 8
+; GFX-FP-CONTRACT-NEXT: [[TMP2:%.*]] = load half, ptr [[A]], align 8
+; GFX-FP-CONTRACT-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; GFX-FP-CONTRACT: [[COMMON_RET:.*]]:
+; GFX-FP-CONTRACT-NEXT: [[COMMON_RET_OP:%.*]] = phi half [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
+; GFX-FP-CONTRACT-NEXT: ret half [[COMMON_RET_OP]]
+; GFX-FP-CONTRACT: [[IF_THEN]]:
+; GFX-FP-CONTRACT-NEXT: [[MUL]] = fmul fast half [[TMP1]], [[TMP2]]
+; GFX-FP-CONTRACT-NEXT: [[ADD:%.*]] = fadd fast half 0xH3C00, [[MUL]]
+; GFX-FP-CONTRACT-NEXT: br label %[[COMMON_RET]]
+; GFX-FP-CONTRACT: [[IF_ELSE]]:
+; GFX-FP-CONTRACT-NEXT: [[MUL1:%.*]] = fmul fast half [[TMP1]], [[TMP2]]
+; GFX-FP-CONTRACT-NEXT: [[SUB]] = fsub fast half [[MUL1]], [[TMP0]]
+; GFX-FP-CONTRACT-NEXT: br label %[[COMMON_RET]]
+;
+; GFX-UNSAFE-FP-IEEE-LABEL: define half @_branch3(
+; GFX-UNSAFE-FP-IEEE-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
+; GFX-UNSAFE-FP-IEEE-NEXT: [[ENTRY:.*:]]
+; GFX-UNSAFE-FP-IEEE-NEXT: [[TMP0:%.*]] = load half, ptr [[Y]], align 8
+; GFX-UNSAFE-FP-IEEE-NEXT: [[CMP:%.*]] = fcmp oeq half [[TMP0]], 0xH0000
+; GFX-UNSAFE-FP-IEEE-NEXT: [[TMP1:%.*]] = load half, ptr [[X]], align 8
+; GFX-UNSAFE-FP-IEEE-NEXT: [[TMP2:%.*]] = load half, ptr [[A]], align 8
+; GFX-UNSAFE-FP-IEEE-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; GFX-UNSAFE-FP-IEEE: [[COMMON_RET:.*]]:
+; GFX-UNSAFE-FP-IEEE-NEXT: [[COMMON_RET_OP:%.*]] = phi half [ [[MUL:%.*]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
+; GFX-UNSAFE-FP-IEEE-NEXT: ret half [[COMMON_RET_OP]]
+; GFX-UNSAFE-FP-IEEE: [[IF_THEN]]:
+; GFX-UNSAFE-FP-IEEE-NEXT: [[MUL]] = fmul fast half [[TMP1]], [[TMP2]]
+; GFX-UNSAFE-FP-IEEE-NEXT: [[ADD:%.*]] = fadd fast half 0xH3C00, [[MUL]]
+; GFX-UNSAFE-FP-IEEE-NEXT: br label %[[COMMON_RET]]
+; GFX-UNSAFE-FP-IEEE: [[IF_ELSE]]:
+; GFX-UNSAFE-FP-IEEE-NEXT: [[MUL1:%.*]] = fmul fast half [[TMP1]], [[TMP2]]
+; GFX-UNSAFE-FP-IEEE-NEXT: [[SUB]] = fsub fast half [[MUL1]], [[TMP0]]
+; GFX-UNSAFE-FP-IEEE-NEXT: br label %[[COMMON_RET]]
+;
+; GFX-UNSAFE-FP-PRESERVE-LABEL: define half @_branch3(
+; GFX-UNSAFE-FP-PRESERVE-SAME: ptr dereferenceable(8) [[X:%.*]], ptr dereferenceable(8) [[Y:%.*]], ptr dereferenceable(8) [[A:%.*]]) #[[ATTR0]] {
+; GFX-UNSAFE-FP-PRESERVE-NEXT: [[ENTRY:.*:]]
+; GFX-UNSAFE-FP-PRESERVE-NEXT: [[TMP0:%.*]] = load half, ptr [[Y]], align 8
+; GFX-UNSAFE-FP-PRESERVE-NEXT: [[CMP:%.*]] = fcmp oeq half [[TMP0]], 0xH0000
+; GFX-UNSAFE-FP-PRESERVE-NEXT: [[TMP1:%.*]] = load half, ptr [[X]], align 8
+; GFX-UNSAFE-FP-PRESERVE-NEXT: [[TMP2:%.*]] = load half, ptr [[A]], align 8
+; GFX-UNSAFE-FP-PRESERVE-NEXT: [[MUL:%.*]] = fmul fast half [[TMP1]], [[TMP2]]
+; GFX-UNSAFE-FP-PRESERVE-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; GFX-UNSAFE-FP-PRESERVE: [[COMMON_RET:.*]]:
+; GFX-UNSAFE-FP-PRESERVE-NEXT: [[COMMON_RET_OP:%.*]] = phi half [ [[MUL]], %[[IF_THEN]] ], [ [[SUB:%.*]], %[[IF_ELSE]] ]
+; GFX-UNSAFE-FP-PRESERVE-NEXT: ret half [[COMMON_RET_OP]]
+; GFX-UNSAFE-FP-PRESERVE: [[IF_THEN]]:
+; GFX-UNSAFE-FP-PRESERVE-NEXT: [[ADD:%.*]] = fadd fast half 0xH3C00, [[MUL]]
+; GFX-UNSAFE-FP-PRESERVE-NEXT: br label %[[COMMON_RET]]
+; GFX-UNSAFE-FP-PRESERVE: [[IF_ELSE]]:
+; GFX-UNSAFE-FP-PRESERVE-NEXT: [[SUB]] = fsub fast half [[MUL]], [[TMP0]]
+; GFX-UNSAFE-FP-PRESERVE-NEXT: br label %[[COMMON_RET]]
+;
+entry:
+ %0 = load half, ptr %y, align 8
+ %cmp = fcmp oeq half %0, 0.000000e+00
+ %1 = load half, ptr %x, align 8
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %2 = load half, ptr %a, align 8
+ %mul = fmul fast half %1, %2
+ %add = fadd fast half 1.000000e+00, %mul
+ ret half %mul
+
+if.else: ; preds = %entry
+ %3 = load half, ptr %a, align 8
+ %mul1 = fmul fast half %1, %3
+ %sub = fsub fast half %mul1, %0
+ ret half %sub
+}
+
More information about the llvm-commits
mailing list