[llvm] 958130d - [AMDGPU] Add simplification/combines for llvm.amdgcn.fma.legacy
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 23 08:31:12 PDT 2020
Author: Jay Foad
Date: 2020-10-23T16:16:13+01:00
New Revision: 958130dfda276304372b54010adfc48f05b13c2c
URL: https://github.com/llvm/llvm-project/commit/958130dfda276304372b54010adfc48f05b13c2c
DIFF: https://github.com/llvm/llvm-project/commit/958130dfda276304372b54010adfc48f05b13c2c.diff
LOG: [AMDGPU] Add simplification/combines for llvm.amdgcn.fma.legacy
This follows on from D89558 which added the new intrinsic and D88955
which added similar combines for llvm.amdgcn.fmul.legacy.
Differential Revision: https://reviews.llvm.org/D90028
Added:
llvm/test/Transforms/InstCombine/AMDGPU/fma_legacy.ll
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
llvm/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index ebc6391b2bc6..59c852b01d8b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -163,6 +163,27 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
return IC.replaceInstUsesWith(II, NewCall);
}
+bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
+ InstCombiner &IC) const {
+ // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
+ // infinity, gives +0.0. If we can prove we don't have one of the special
+ // cases then we can use a normal multiply instead.
+ // TODO: Create and use isKnownFiniteNonZero instead of just matching
+ // constants here.
+ if (match(Op0, PatternMatch::m_FiniteNonZero()) ||
+ match(Op1, PatternMatch::m_FiniteNonZero())) {
+ // One operand is not zero or infinity or NaN.
+ return true;
+ }
+ auto *TLI = &IC.getTargetLibraryInfo();
+ if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
+ isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
+ // Neither operand is infinity or NaN.
+ return true;
+ }
+ return false;
+}
+
Optional<Instruction *>
GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
Intrinsic::ID IID = II.getIntrinsicID();
@@ -836,26 +857,40 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
// If we can prove we don't have one of the special cases then we can use a
// normal fmul instruction instead.
- auto *TLI = &IC.getTargetLibraryInfo();
- bool CanSimplifyToMul = false;
- // TODO: Create and use isKnownFiniteNonZero instead of just matching
- // constants here.
- if (match(Op0, PatternMatch::m_FiniteNonZero()) ||
- match(Op1, PatternMatch::m_FiniteNonZero())) {
- // One operand is not zero or infinity or NaN.
- CanSimplifyToMul = true;
- } else if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
- isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
- // Neither operand is infinity or NaN.
- CanSimplifyToMul = true;
- }
- if (CanSimplifyToMul) {
+ if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
FMul->takeName(&II);
return IC.replaceInstUsesWith(II, FMul);
}
break;
}
+ case Intrinsic::amdgcn_fma_legacy: {
+ Value *Op0 = II.getArgOperand(0);
+ Value *Op1 = II.getArgOperand(1);
+ Value *Op2 = II.getArgOperand(2);
+
+ // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
+ // infinity, gives +0.0.
+ // TODO: Move to InstSimplify?
+ if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
+ match(Op1, PatternMatch::m_AnyZeroFP())) {
+ // It's tempting to just return Op2 here, but that would give the wrong
+ // result if Op2 was -0.0.
+ auto *Zero = ConstantFP::getNullValue(II.getType());
+ auto *FAdd = IC.Builder.CreateFAddFMF(Zero, Op2, &II);
+ FAdd->takeName(&II);
+ return IC.replaceInstUsesWith(II, FAdd);
+ }
+
+ // If we can prove we don't have one of the special cases then we can use a
+ // normal fma instead.
+ if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
+ II.setCalledOperand(Intrinsic::getDeclaration(
+ II.getModule(), Intrinsic::fma, II.getType()));
+ return &II;
+ }
+ break;
+ }
default: {
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index c4d9f9e6f854..29e30b6ef93d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -227,6 +227,8 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
Value *NewV) const;
+ bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
+ InstCombiner &IC) const;
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const;
Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fma_legacy.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fma_legacy.ll
new file mode 100644
index 000000000000..2a740da875ea
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/fma_legacy.ll
@@ -0,0 +1,86 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -instcombine -S | FileCheck %s
+
+; Simplify to +0.0 + z.
+define float @test_zero(float %x, float %z) {
+; CHECK-LABEL: @test_zero(
+; CHECK-NEXT: [[CALL:%.*]] = fadd float [[Z:%.*]], 0.000000e+00
+; CHECK-NEXT: ret float [[CALL]]
+;
+ %call = call float @llvm.amdgcn.fma.legacy(float %x, float 0.0, float %z)
+ ret float %call
+}
+
+; Simplify to +0.0 + z, preserving fmf.
+define float @test_zero_fmf(float %x, float %z) {
+; CHECK-LABEL: @test_zero_fmf(
+; CHECK-NEXT: [[CALL:%.*]] = fadd contract float [[Z:%.*]], 0.000000e+00
+; CHECK-NEXT: ret float [[CALL]]
+;
+ %call = call contract float @llvm.amdgcn.fma.legacy(float %x, float 0.0, float %z)
+ ret float %call
+}
+
+; Simplify to z.
+define float @test_zero_nsz(float %x, float %z) {
+; CHECK-LABEL: @test_zero_nsz(
+; CHECK-NEXT: ret float [[Z:%.*]]
+;
+ %call = call nsz float @llvm.amdgcn.fma.legacy(float %x, float 0.0, float %z)
+ ret float %call
+}
+
+; Simplify to +0.0 + z.
+define float @test_negzero(float %y, float %z) {
+; CHECK-LABEL: @test_negzero(
+; CHECK-NEXT: [[CALL:%.*]] = fadd float [[Z:%.*]], 0.000000e+00
+; CHECK-NEXT: ret float [[CALL]]
+;
+ %call = call float @llvm.amdgcn.fma.legacy(float -0.0, float %y, float %z)
+ ret float %call
+}
+
+; Simplify to z.
+define float @test_negzero_nsz(float %y, float %z) {
+; CHECK-LABEL: @test_negzero_nsz(
+; CHECK-NEXT: ret float [[Z:%.*]]
+;
+ %call = call nsz float @llvm.amdgcn.fma.legacy(float -0.0, float %y, float %z)
+ ret float %call
+}
+
+; Combine to fma because the constant is finite and non-zero.
+define float @test_const(float %x, float %z) {
+; CHECK-LABEL: @test_const(
+; CHECK-NEXT: [[CALL:%.*]] = call float @llvm.fma.f32(float [[X:%.*]], float 9.950000e+01, float [[Z:%.*]])
+; CHECK-NEXT: ret float [[CALL]]
+;
+ %call = call float @llvm.amdgcn.fma.legacy(float %x, float 99.5, float %z)
+ ret float %call
+}
+
+; Combine to fma because the constant is finite and non-zero, preserving fmf.
+define float @test_const_fmf(float %x, float %z) {
+; CHECK-LABEL: @test_const_fmf(
+; CHECK-NEXT: [[CALL:%.*]] = call contract float @llvm.fma.f32(float [[X:%.*]], float 9.950000e+01, float [[Z:%.*]])
+; CHECK-NEXT: ret float [[CALL]]
+;
+ %call = call contract float @llvm.amdgcn.fma.legacy(float %x, float 99.5, float %z)
+ ret float %call
+}
+
+; Combine to fma because neither argument can be infinity or NaN.
+define float @test_finite(i32 %x, i32 %y, float %z) {
+; CHECK-LABEL: @test_finite(
+; CHECK-NEXT: [[XF:%.*]] = sitofp i32 [[X:%.*]] to float
+; CHECK-NEXT: [[YF:%.*]] = sitofp i32 [[Y:%.*]] to float
+; CHECK-NEXT: [[CALL:%.*]] = call float @llvm.fma.f32(float [[XF]], float [[YF]], float [[Z:%.*]])
+; CHECK-NEXT: ret float [[CALL]]
+;
+ %xf = sitofp i32 %x to float
+ %yf = sitofp i32 %y to float
+ %call = call float @llvm.amdgcn.fma.legacy(float %xf, float %yf, float %z)
+ ret float %call
+}
+
+declare float @llvm.amdgcn.fma.legacy(float, float, float)
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll
index efb8294a8473..56839dbb7f29 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll
@@ -29,6 +29,16 @@ define float @test_const(float %x) {
ret float %call
}
+; Combine to fmul because the constant is finite and non-zero, preserving fmf.
+define float @test_const_fmf(float %x) {
+; CHECK-LABEL: @test_const_fmf(
+; CHECK-NEXT: [[CALL:%.*]] = fmul contract float [[X:%.*]], 9.950000e+01
+; CHECK-NEXT: ret float [[CALL]]
+;
+ %call = call contract float @llvm.amdgcn.fmul.legacy(float %x, float 99.5)
+ ret float %call
+}
+
; Combine to fmul because neither argument can be infinity or NaN.
define float @test_finite(i32 %x, i32 %y) {
; CHECK-LABEL: @test_finite(
More information about the llvm-commits
mailing list