[llvm] 86a480e - [AMDGPU] Add simplification/combines for llvm.amdgcn.fmul.legacy
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 23 01:31:15 PDT 2020
Author: Jay Foad
Date: 2020-10-23T09:31:00+01:00
New Revision: 86a480e9ce786bc55c1c9632d9a42c08846e4695
URL: https://github.com/llvm/llvm-project/commit/86a480e9ce786bc55c1c9632d9a42c08846e4695
DIFF: https://github.com/llvm/llvm-project/commit/86a480e9ce786bc55c1c9632d9a42c08846e4695.diff
LOG: [AMDGPU] Add simplification/combines for llvm.amdgcn.fmul.legacy
Differential Revision: https://reviews.llvm.org/D88955
Added:
llvm/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll
Modified:
llvm/include/llvm/IR/PatternMatch.h
llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 987d9c4b99fc..562557bdd5c5 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -619,6 +619,18 @@ inline cstfp_pred_ty<is_finite> m_Finite() {
}
inline apf_pred_ty<is_finite> m_Finite(const APFloat *&V) { return V; }
+struct is_finitenonzero {
+ bool isValue(const APFloat &C) { return C.isFiniteNonZero(); }
+};
+/// Match a finite non-zero FP constant.
+/// For vectors, this includes constants with undefined elements.
+inline cstfp_pred_ty<is_finitenonzero> m_FiniteNonZero() {
+ return cstfp_pred_ty<is_finitenonzero>();
+}
+inline apf_pred_ty<is_finitenonzero> m_FiniteNonZero(const APFloat *&V) {
+ return V;
+}
+
struct is_any_zero_fp {
bool isValue(const APFloat &C) { return C.isZero(); }
};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 4b2624350b74..ebc6391b2bc6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -823,6 +823,39 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
break;
}
+ case Intrinsic::amdgcn_fmul_legacy: {
+ Value *Op0 = II.getArgOperand(0);
+ Value *Op1 = II.getArgOperand(1);
+
+ // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
+ // infinity, gives +0.0.
+ // TODO: Move to InstSimplify?
+ if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
+ match(Op1, PatternMatch::m_AnyZeroFP()))
+ return IC.replaceInstUsesWith(II, ConstantFP::getNullValue(II.getType()));
+
+ // If we can prove we don't have one of the special cases then we can use a
+ // normal fmul instruction instead.
+ auto *TLI = &IC.getTargetLibraryInfo();
+ bool CanSimplifyToMul = false;
+ // TODO: Create and use isKnownFiniteNonZero instead of just matching
+ // constants here.
+ if (match(Op0, PatternMatch::m_FiniteNonZero()) ||
+ match(Op1, PatternMatch::m_FiniteNonZero())) {
+ // One operand is not zero or infinity or NaN.
+ CanSimplifyToMul = true;
+ } else if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
+ isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
+ // Neither operand is infinity or NaN.
+ CanSimplifyToMul = true;
+ }
+ if (CanSimplifyToMul) {
+ auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
+ FMul->takeName(&II);
+ return IC.replaceInstUsesWith(II, FMul);
+ }
+ break;
+ }
default: {
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll
new file mode 100644
index 000000000000..efb8294a8473
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -instcombine -S | FileCheck %s
+
+; Simplify to +0.0.
+define float @test_zero(float %x) {
+; CHECK-LABEL: @test_zero(
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %call = call float @llvm.amdgcn.fmul.legacy(float %x, float 0.0)
+ ret float %call
+}
+
+; Simplify to +0.0.
+define float @test_negzero(float %y) {
+; CHECK-LABEL: @test_negzero(
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %call = call float @llvm.amdgcn.fmul.legacy(float -0.0, float %y)
+ ret float %call
+}
+
+; Combine to fmul because the constant is finite and non-zero.
+define float @test_const(float %x) {
+; CHECK-LABEL: @test_const(
+; CHECK-NEXT: [[CALL:%.*]] = fmul float [[X:%.*]], 9.950000e+01
+; CHECK-NEXT: ret float [[CALL]]
+;
+ %call = call float @llvm.amdgcn.fmul.legacy(float %x, float 99.5)
+ ret float %call
+}
+
+; Combine to fmul because neither argument can be infinity or NaN.
+define float @test_finite(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_finite(
+; CHECK-NEXT: [[XF:%.*]] = sitofp i32 [[X:%.*]] to float
+; CHECK-NEXT: [[YF:%.*]] = sitofp i32 [[Y:%.*]] to float
+; CHECK-NEXT: [[CALL:%.*]] = fmul float [[XF]], [[YF]]
+; CHECK-NEXT: ret float [[CALL]]
+;
+ %xf = sitofp i32 %x to float
+ %yf = sitofp i32 %y to float
+ %call = call float @llvm.amdgcn.fmul.legacy(float %xf, float %yf)
+ ret float %call
+}
+
+declare float @llvm.amdgcn.fmul.legacy(float, float)
More information about the llvm-commits
mailing list