[llvm] 1bce1be - AMDGPU: Reduce number of calls to computeKnownFPClass and pass all arguments

Wed Apr 26 10:02:25 PDT 2023

Author: Matt Arsenault
Date: 2023-04-26T13:02:17-04:00
New Revision: 1bce1beac4b7980ed01c07d6a6d789f8ec55adac

URL: https://github.com/llvm/llvm-project/commit/1bce1beac4b7980ed01c07d6a6d789f8ec55adac
DIFF: https://github.com/llvm/llvm-project/commit/1bce1beac4b7980ed01c07d6a6d789f8ec55adac.diff

LOG: AMDGPU: Reduce number of calls to computeKnownFPClass and pass all arguments

Makes assumes work for this case.

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/ValueTracking.h
    llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
    llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
    llvm/test/Transforms/InstCombine/AMDGPU/fma_legacy.ll
    llvm/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index 8bd38a317b797..4e95791aaa14e 100644

--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -420,6 +420,17 @@ bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI);
 bool isKnownNeverInfinity(const Value *V, const TargetLibraryInfo *TLI,
                           unsigned Depth = 0);
 
+/// Return true if the floating-point value can never contain a NaN or infinity.
+inline bool isKnownNeverInfOrNaN(
+    const Value *V, const DataLayout &DL, const TargetLibraryInfo *TLI,
+    unsigned Depth = 0, AssumptionCache *AC = nullptr,
+    const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr,
+    OptimizationRemarkEmitter *ORE = nullptr, bool UseInstrInfo = true) {
+  KnownFPClass Known = computeKnownFPClass(V, DL, fcInf | fcNan, Depth, TLI, AC,
+                                           CtxI, DT, ORE, UseInstrInfo);
+  return Known.isKnownNeverNaN() && Known.isKnownNeverInfinity();
+}
+
 /// Return true if the floating-point scalar value is not a NaN or if the
 /// floating-point vector value has no NaN elements. Return false if a value
 /// could ever be NaN.

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 44ad131bd7eff..48a6fde657094 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -328,7 +328,8 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
       });
 }
 
-bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
+bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Instruction &I,
+                                           const Value *Op0, const Value *Op1,
                                            InstCombiner &IC) const {
   // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
   // infinity, gives +0.0. If we can prove we don't have one of the special
@@ -340,9 +341,14 @@ bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
     // One operand is not zero or infinity or NaN.
     return true;
   }
+
   auto *TLI = &IC.getTargetLibraryInfo();
-  if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
-      isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
+  if (isKnownNeverInfOrNaN(Op0, IC.getDataLayout(), TLI, 0,
+                           &IC.getAssumptionCache(), &I, &IC.getDominatorTree(),
+                           &IC.getOptimizationRemarkEmitter()) &&
+      isKnownNeverInfOrNaN(Op1, IC.getDataLayout(), TLI, 0,
+                           &IC.getAssumptionCache(), &I, &IC.getDominatorTree(),
+                           &IC.getOptimizationRemarkEmitter())) {
     // Neither operand is infinity or NaN.
     return true;
   }
@@ -1005,7 +1011,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
 
     // If we can prove we don't have one of the special cases then we can use a
     // normal fmul instruction instead.
-    if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
+    if (canSimplifyLegacyMulToMul(II, Op0, Op1, IC)) {
       auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
       FMul->takeName(&II);
       return IC.replaceInstUsesWith(II, FMul);
@@ -1032,7 +1038,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
 
     // If we can prove we don't have one of the special cases then we can use a
     // normal fma instead.
-    if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
+    if (canSimplifyLegacyMulToMul(II, Op0, Op1, IC)) {
       II.setCalledOperand(Intrinsic::getDeclaration(
           II.getModule(), Intrinsic::fma, II.getType()));
       return &II;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 972ea8cf52ea0..cdd76861335fb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -209,8 +209,8 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
   Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
                                           Value *NewV) const;
 
-  bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
-                                 InstCombiner &IC) const;
+  bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0,
+                                 const Value *Op1, InstCombiner &IC) const;
   std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
                                                     IntrinsicInst &II) const;
   std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(

diff  --git a/llvm/test/Transforms/InstCombine/AMDGPU/fma_legacy.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fma_legacy.ll
index 89979bcedf814..72bffe62fbb14 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/fma_legacy.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/fma_legacy.ll
@@ -83,4 +83,28 @@ define float @test_finite(i32 %x, i32 %y, float %z) {
   ret float %call
 }
 
+; Combine to fma because neither argument can be infinity or NaN based on assumptions
+define float @test_finite_assumed(float %x, float %y, float %z) {
+; CHECK-LABEL: @test_finite_assumed(
+; CHECK-NEXT:    [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT:    [[IS_FINITE_X:%.*]] = fcmp one float [[FABS_X]], 0x7FF0000000000000
+; CHECK-NEXT:    [[FABS_Y:%.*]] = call float @llvm.fabs.f32(float [[Y:%.*]])
+; CHECK-NEXT:    [[IS_FINITE_Y:%.*]] = fcmp one float [[FABS_Y]], 0x7FF0000000000000
+; CHECK-NEXT:    call void @llvm.assume(i1 [[IS_FINITE_X]])
+; CHECK-NEXT:    call void @llvm.assume(i1 [[IS_FINITE_Y]])
+; CHECK-NEXT:    [[CALL:%.*]] = call float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z:%.*]])
+; CHECK-NEXT:    ret float [[CALL]]
+;
+  %fabs.x = call float @llvm.fabs.f32(float %x)
+  %is.finite.x = fcmp one float %fabs.x, 0x7FF0000000000000
+  %fabs.y = call float @llvm.fabs.f32(float %y)
+  %is.finite.y = fcmp one float %fabs.y, 0x7FF0000000000000
+  call void @llvm.assume(i1 %is.finite.x)
+  call void @llvm.assume(i1 %is.finite.y)
+  %call = call float @llvm.amdgcn.fma.legacy(float %x, float %y, float %z)
+  ret float %call
+}
+
 declare float @llvm.amdgcn.fma.legacy(float, float, float)
+declare float @llvm.fabs.f32(float)
+declare void @llvm.assume(i1 noundef)

diff  --git a/llvm/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll
index 439bedbe60dd8..d58470d6b12ad 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll
@@ -53,4 +53,28 @@ define float @test_finite(i32 %x, i32 %y) {
   ret float %call
 }
 
+; Combine to fmul because neither argument can be infinity or NaN based on assumptions
+define float @test_finite_assumed(float %x, float %y) {
+; CHECK-LABEL: @test_finite_assumed(
+; CHECK-NEXT:    [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT:    [[IS_FINITE_X:%.*]] = fcmp one float [[FABS_X]], 0x7FF0000000000000
+; CHECK-NEXT:    [[FABS_Y:%.*]] = call float @llvm.fabs.f32(float [[Y:%.*]])
+; CHECK-NEXT:    [[IS_FINITE_Y:%.*]] = fcmp one float [[FABS_Y]], 0x7FF0000000000000
+; CHECK-NEXT:    call void @llvm.assume(i1 [[IS_FINITE_X]])
+; CHECK-NEXT:    call void @llvm.assume(i1 [[IS_FINITE_Y]])
+; CHECK-NEXT:    [[CALL:%.*]] = fmul float [[X]], [[Y]]
+; CHECK-NEXT:    ret float [[CALL]]
+;
+  %fabs.x = call float @llvm.fabs.f32(float %x)
+  %is.finite.x = fcmp one float %fabs.x, 0x7FF0000000000000
+  %fabs.y = call float @llvm.fabs.f32(float %y)
+  %is.finite.y = fcmp one float %fabs.y, 0x7FF0000000000000
+  call void @llvm.assume(i1 %is.finite.x)
+  call void @llvm.assume(i1 %is.finite.y)
+  %call = call float @llvm.amdgcn.fmul.legacy(float %x, float %y)
+  ret float %call
+}
+
 declare float @llvm.amdgcn.fmul.legacy(float, float)
+declare float @llvm.fabs.f32(float)
+declare void @llvm.assume(i1 noundef)