[llvm] 1417abe - [AMDGPU] Add new llvm.amdgcn.fma.legacy intrinsic
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 16 09:16:21 PDT 2020
Author: Jay Foad
Date: 2020-10-16T17:10:21+01:00
New Revision: 1417abe54c28854094230f6d3b743d5578f95dff
URL: https://github.com/llvm/llvm-project/commit/1417abe54c28854094230f6d3b743d5578f95dff
DIFF: https://github.com/llvm/llvm-project/commit/1417abe54c28854094230f6d3b743d5578f95dff.diff
LOG: [AMDGPU] Add new llvm.amdgcn.fma.legacy intrinsic
Differential Revision: https://reviews.llvm.org/D89558
Added:
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll
llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/fma_legacy.ll
Modified:
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/lib/Analysis/ConstantFolding.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/lib/Target/AMDGPU/VOP3Instructions.td
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index ed96c0896d74..304377ce28ab 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -255,7 +255,17 @@ def int_amdgcn_log_clamp : Intrinsic<
def int_amdgcn_fmul_legacy : GCCBuiltin<"__builtin_amdgcn_fmul_legacy">,
Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, IntrSpeculatable, IntrWillReturn]
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
+>;
+
+// Fused single-precision multiply-add with legacy behaviour for the multiply,
+// which is that +/- 0.0 * anything (even NaN or infinity) is +0.0. This is
+// intended for use on subtargets that have the v_fma_legacy_f32 and/or
+// v_fmac_legacy_f32 instructions. (Note that v_fma_legacy_f16 is unrelated and
+// has a completely
diff erent kind of legacy behaviour.)
+def int_amdgcn_fma_legacy :
+ Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
>;
def int_amdgcn_rcp : Intrinsic<
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 9ae67d074c5c..bf445375962d 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1504,6 +1504,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::amdgcn_cubesc:
case Intrinsic::amdgcn_cubetc:
case Intrinsic::amdgcn_fmul_legacy:
+ case Intrinsic::amdgcn_fma_legacy:
case Intrinsic::amdgcn_fract:
case Intrinsic::amdgcn_ldexp:
case Intrinsic::amdgcn_sin:
@@ -2371,8 +2372,8 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
if (IntrinsicID == Intrinsic::amdgcn_fmul_legacy) {
const APFloat &C1 = Op1->getValueAPF();
const APFloat &C2 = Op2->getValueAPF();
- // The legacy behaviour is that multiplying zero by anything, even NaN
- // or infinity, gives +0.0.
+ // The legacy behaviour is that multiplying +/- 0.0 by anything, even
+ // NaN or infinity, gives +0.0.
if (C1.isZero() || C2.isZero())
return ConstantFP::getNullValue(Ty);
return ConstantFP::get(Ty->getContext(), C1 * C2);
@@ -2706,6 +2707,19 @@ static Constant *ConstantFoldScalarCall3(StringRef Name,
if (const auto *Op3 = dyn_cast<ConstantFP>(Operands[2])) {
switch (IntrinsicID) {
default: break;
+ case Intrinsic::amdgcn_fma_legacy: {
+ const APFloat &C1 = Op1->getValueAPF();
+ const APFloat &C2 = Op2->getValueAPF();
+ // The legacy behaviour is that multiplying +/- 0.0 by anything, even
+ // NaN or infinity, gives +0.0.
+ if (C1.isZero() || C2.isZero()) {
+ const APFloat &C3 = Op3->getValueAPF();
+ // It's tempting to just return C3 here, but that would give the
+ // wrong result if C3 was -0.0.
+ return ConstantFP::get(Ty->getContext(), APFloat(0.0f) + C3);
+ }
+ LLVM_FALLTHROUGH;
+ }
case Intrinsic::fma:
case Intrinsic::fmuladd: {
APFloat V = Op1->getValueAPF();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 5185d6d2f55a..1ffded0a0be3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -599,6 +599,7 @@ static bool fnegFoldsIntoOp(unsigned Opc) {
case AMDGPUISD::FMIN_LEGACY:
case AMDGPUISD::FMAX_LEGACY:
case AMDGPUISD::FMED3:
+ // TODO: handle llvm.amdgcn.fma.legacy
return true;
default:
return false;
@@ -3723,6 +3724,7 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
}
case ISD::FMA:
case ISD::FMAD: {
+ // TODO: handle llvm.amdgcn.fma.legacy
if (!mayIgnoreSignedZero(N0))
return SDValue();
@@ -4713,6 +4715,12 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
case Intrinsic::amdgcn_fdot2:
// TODO: Refine on operand
return SNaN;
+ case Intrinsic::amdgcn_fma_legacy:
+ if (SNaN)
+ return true;
+ return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
+ DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1) &&
+ DAG.isKnownNeverNaN(Op.getOperand(3), SNaN, Depth + 1);
default:
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 21eaa6b21327..d79828de1d76 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4020,6 +4020,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_rsq_legacy:
case Intrinsic::amdgcn_rsq_clamp:
case Intrinsic::amdgcn_fmul_legacy:
+ case Intrinsic::amdgcn_fma_legacy:
case Intrinsic::amdgcn_ldexp:
case Intrinsic::amdgcn_frexp_mant:
case Intrinsic::amdgcn_frexp_exp:
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 621c339ff105..cb3fcf88d169 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -895,6 +895,17 @@ def : GCNPat <
SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
+// Don't allow source modifiers. If there are any source modifiers then it's
+// better to select fma instead of fmac.
+let SubtargetPredicate = HasNoMadMacF32Insts in
+def : GCNPat <
+ (f32 (int_amdgcn_fma_legacy (VOP3NoMods f32:$src0),
+ (VOP3NoMods f32:$src1),
+ (VOP3NoMods f32:$src2))),
+ (V_FMAC_LEGACY_F32_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
+ SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
let SubtargetPredicate = Has16BitInsts in {
def : FMADPat <f16, V_MAC_F16_e64, fmad>;
def : FMADPat <f16, V_MAC_F16_e64, AMDGPUfmad_ftz>;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index c4546f989c70..8fea9403cc42 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -298,7 +298,9 @@ def V_MAD_F32 : VOP3Inst <"v_mad_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, fmad>;
} // End SubtargetPredicate = HasMadMacInsts
let SubtargetPredicate = HasNoMadMacF32Insts in
-def V_FMA_LEGACY_F32 : VOP3Inst <"v_fma_legacy_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
+def V_FMA_LEGACY_F32 : VOP3Inst <"v_fma_legacy_f32",
+ VOP3_Profile<VOP_F32_F32_F32_F32>,
+ int_amdgcn_fma_legacy>;
}
def V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll
new file mode 100644
index 000000000000..28e2c7a9db69
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GCN %s
+
+define float @v_fma(float %a, float %b, float %c) {
+; GCN-LABEL: v_fma:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_waitcnt_vscnt null, 0x0
+; GCN-NEXT: v_fmac_legacy_f32_e64 v2, v0, v1
+; GCN-NEXT: ; implicit-def: $vcc_hi
+; GCN-NEXT: v_mov_b32_e32 v0, v2
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %fma = call float @llvm.amdgcn.fma.legacy(float %a, float %b, float %c)
+ ret float %fma
+}
+
+define float @v_fabs_fma(float %a, float %b, float %c) {
+; GCN-LABEL: v_fabs_fma:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_waitcnt_vscnt null, 0x0
+; GCN-NEXT: v_fma_legacy_f32 v0, |v0|, v1, v2
+; GCN-NEXT: ; implicit-def: $vcc_hi
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %fabs.a = call float @llvm.fabs.f32(float %a)
+ %fma = call float @llvm.amdgcn.fma.legacy(float %fabs.a, float %b, float %c)
+ ret float %fma
+}
+
+define float @v_fneg_fabs_fma(float %a, float %b, float %c) {
+; GCN-LABEL: v_fneg_fabs_fma:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_waitcnt_vscnt null, 0x0
+; GCN-NEXT: v_fma_legacy_f32 v0, v0, -|v1|, v2
+; GCN-NEXT: ; implicit-def: $vcc_hi
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %fabs.b = call float @llvm.fabs.f32(float %b)
+ %neg.fabs.b = fneg float %fabs.b
+ %fma = call float @llvm.amdgcn.fma.legacy(float %a, float %neg.fabs.b, float %c)
+ ret float %fma
+}
+
+define float @v_fneg_fma(float %a, float %b, float %c) {
+; GCN-LABEL: v_fneg_fma:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_waitcnt_vscnt null, 0x0
+; GCN-NEXT: v_fma_legacy_f32 v0, v0, v1, -v2
+; GCN-NEXT: ; implicit-def: $vcc_hi
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %neg.c = fneg float %c
+ %fma = call float @llvm.amdgcn.fma.legacy(float %a, float %b, float %neg.c)
+ ret float %fma
+}
+
+declare float @llvm.amdgcn.fma.legacy(float, float, float)
+declare float @llvm.fabs.f32(float)
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/fma_legacy.ll b/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/fma_legacy.ll
new file mode 100644
index 000000000000..6e855a668b4a
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/fma_legacy.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+declare float @llvm.amdgcn.fma.legacy(float, float, float)
+
+define void @test(float* %p) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: store volatile float 1.000000e+01, float* [[P:%.*]], align 4
+; CHECK-NEXT: store volatile float 4.000000e+00, float* [[P]], align 4
+; CHECK-NEXT: store volatile float 4.000000e+00, float* [[P]], align 4
+; CHECK-NEXT: store volatile float 0.000000e+00, float* [[P]], align 4
+; CHECK-NEXT: store volatile float 0.000000e+00, float* [[P]], align 4
+; CHECK-NEXT: store volatile float 0.000000e+00, float* [[P]], align 4
+; CHECK-NEXT: store volatile float 0.000000e+00, float* [[P]], align 4
+; CHECK-NEXT: store volatile float 4.000000e+00, float* [[P]], align 4
+; CHECK-NEXT: store volatile float 4.000000e+00, float* [[P]], align 4
+; CHECK-NEXT: store volatile float 4.000000e+00, float* [[P]], align 4
+; CHECK-NEXT: store volatile float 4.000000e+00, float* [[P]], align 4
+; CHECK-NEXT: ret void
+;
+ %a = call float @llvm.amdgcn.fma.legacy(float +2.0, float +3.0, float +4.0)
+ store volatile float %a, float* %p
+ %b = call float @llvm.amdgcn.fma.legacy(float +2.0, float +0.0, float +4.0)
+ store volatile float %b, float* %p
+ %c = call float @llvm.amdgcn.fma.legacy(float +2.0, float -0.0, float +4.0)
+ store volatile float %c, float* %p
+ %d = call float @llvm.amdgcn.fma.legacy(float +0.0, float +0.0, float -0.0)
+ store volatile float %d, float* %p
+ %e = call float @llvm.amdgcn.fma.legacy(float +0.0, float -0.0, float -0.0)
+ store volatile float %e, float* %p
+ %f = call float @llvm.amdgcn.fma.legacy(float -0.0, float +0.0, float -0.0)
+ store volatile float %f, float* %p
+ %g = call float @llvm.amdgcn.fma.legacy(float -0.0, float -0.0, float -0.0)
+ store volatile float %g, float* %p
+ %h = call float @llvm.amdgcn.fma.legacy(float +0.0, float 0x7ff0000000000000, float +4.0) ; +inf
+ store volatile float %h, float* %p
+ %i = call float @llvm.amdgcn.fma.legacy(float 0xfff0000000000000, float +0.0, float +4.0) ; -inf
+ store volatile float %i, float* %p
+ %j = call float @llvm.amdgcn.fma.legacy(float 0x7ff0001000000000, float -0.0, float +4.0) ; +nan
+ store volatile float %j, float* %p
+ %k = call float @llvm.amdgcn.fma.legacy(float -0.0, float 0xfff0000100000000, float +4.0) ; -nan
+ store volatile float %k, float* %p
+ ret void
+}
More information about the llvm-commits
mailing list