[llvm] 1c64b5d - [ConstantFolding] Fold constrained arithmetic intrinsics

Fri Jul 23 00:40:31 PDT 2021

Author: Serge Pavlov
Date: 2021-07-23T14:39:51+07:00
New Revision: 1c64b5dc5ea8c20a7f2ae436f31030bde0c99db3

URL: https://github.com/llvm/llvm-project/commit/1c64b5dc5ea8c20a7f2ae436f31030bde0c99db3
DIFF: https://github.com/llvm/llvm-project/commit/1c64b5dc5ea8c20a7f2ae436f31030bde0c99db3.diff

LOG: [ConstantFolding] Fold constrained arithmetic intrinsics

Constfold constrained variants of operations fadd, fsub, fmul, fdiv,
frem, fma and fmuladd.

The change also sets up some means to support for removal of unused
constrained intrinsics. They are declared as accessing memory to model
interaction with floating point environment, so they were not removed,
as they have side effect. Now constrained intrinsics that have
"fpexcept.ignore" as exception behavior are removed if they have no uses.
As for intrinsics that have exception behavior other than "fpexcept.ignore",
they can be removed if it is known that they do not raise floating point
exceptions. It happens when doing constant folding, attributes of such
intrinsic are changed so that the intrinsic is not claimed as accessing
memory.

Differential Revision: https://reviews.llvm.org/D102673

Added: 
    

Modified: 
    llvm/lib/Analysis/ConstantFolding.cpp
    llvm/lib/Transforms/Utils/Local.cpp
    llvm/test/Transforms/InstSimplify/constfold-constrained.ll
    llvm/test/Transforms/InstSimplify/fdiv-strictfp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index af25dabb0e17c..b28a0d6c78cd2 100644

--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1593,6 +1593,13 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
   case Intrinsic::rint:
   // Constrained intrinsics can be folded if FP environment is known
   // to compiler.
+  case Intrinsic::experimental_constrained_fma:
+  case Intrinsic::experimental_constrained_fmuladd:
+  case Intrinsic::experimental_constrained_fadd:
+  case Intrinsic::experimental_constrained_fsub:
+  case Intrinsic::experimental_constrained_fmul:
+  case Intrinsic::experimental_constrained_fdiv:
+  case Intrinsic::experimental_constrained_frem:
   case Intrinsic::experimental_constrained_ceil:
   case Intrinsic::experimental_constrained_floor:
   case Intrinsic::experimental_constrained_round:
@@ -1854,6 +1861,56 @@ static bool getConstIntOrUndef(Value *Op, const APInt *&C) {
   return false;
 }
 
+/// Checks if the given intrinsic call, which evaluates to constant, is allowed
+/// to be folded.
+///
+/// \param CI Constrained intrinsic call.
+/// \param St Exception flags raised during constant evaluation.
+static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI,
+                               APFloat::opStatus St) {
+  Optional<RoundingMode> ORM = CI->getRoundingMode();
+  Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
+
+  // If the operation does not change exception status flags, it is safe
+  // to fold.
+  if (St == APFloat::opStatus::opOK) {
+    // When FP exceptions are not ignored, intrinsic call will not be
+    // eliminated, because it is considered as having side effect. But we
+    // know that its evaluation does not raise exceptions, so side effect
+    // is absent. To allow removing the call, mark it as not accessing memory.
+    if (EB && *EB != fp::ExceptionBehavior::ebIgnore)
+      CI->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
+    return true;
+  }
+
+  // If evaluation raised FP exception, the result can depend on rounding
+  // mode. If the latter is unknown, folding is not possible.
+  if (!ORM || *ORM == RoundingMode::Dynamic)
+    return false;
+
+  // If FP exceptions are ignored, fold the call, even if such exception is
+  // raised.
+  if (!EB || *EB != fp::ExceptionBehavior::ebStrict)
+    return true;
+
+  // Leave the calculation for runtime so that exception flags be correctly set
+  // in hardware.
+  return false;
+}
+
+/// Returns the rounding mode that should be used for constant evaluation.
+static RoundingMode
+getEvaluationRoundingMode(const ConstrainedFPIntrinsic *CI) {
+  Optional<RoundingMode> ORM = CI->getRoundingMode();
+  if (!ORM || *ORM == RoundingMode::Dynamic)
+    // Even if the rounding mode is unknown, try evaluating the operation.
+    // If it does not raise inexact exception, rounding was not applied,
+    // so the result is exact and does not depend on rounding mode. Whether
+    // other FP exceptions are raised, it does not depend on rounding mode.
+    return RoundingMode::NearestTiesToEven;
+  return *ORM;
+}
+
 static Constant *ConstantFoldScalarCall1(StringRef Name,
                                          Intrinsic::ID IntrinsicID,
                                          Type *Ty,
@@ -2356,16 +2413,45 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
     }
   }
 
-  if (auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
+  if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
     if (!Ty->isFloatingPointTy())
       return nullptr;
     APFloat Op1V = Op1->getValueAPF();
 
-    if (auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
+    if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
       if (Op2->getType() != Op1->getType())
         return nullptr;
       APFloat Op2V = Op2->getValueAPF();
 
+      if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) {
+        RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);
+        APFloat Res = Op1V;
+        APFloat::opStatus St;
+        switch (IntrinsicID) {
+        default:
+          return nullptr;
+        case Intrinsic::experimental_constrained_fadd:
+          St = Res.add(Op2V, RM);
+          break;
+        case Intrinsic::experimental_constrained_fsub:
+          St = Res.subtract(Op2V, RM);
+          break;
+        case Intrinsic::experimental_constrained_fmul:
+          St = Res.multiply(Op2V, RM);
+          break;
+        case Intrinsic::experimental_constrained_fdiv:
+          St = Res.divide(Op2V, RM);
+          break;
+        case Intrinsic::experimental_constrained_frem:
+          St = Res.mod(Op2V);
+          break;
+        }
+        if (mayFoldConstrained(const_cast<ConstrainedFPIntrinsic *>(ConstrIntr),
+                               St))
+          return ConstantFP::get(Ty->getContext(), Res);
+        return nullptr;
+      }
+
       switch (IntrinsicID) {
       default:
         break;
@@ -2437,6 +2523,8 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
         break;
       }
     } else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
+      if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
+        return nullptr;
       if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())
         return ConstantFP::get(
             Ty->getContext(),
@@ -2772,6 +2860,25 @@ static Constant *ConstantFoldScalarCall3(StringRef Name,
         const APFloat &C1 = Op1->getValueAPF();
         const APFloat &C2 = Op2->getValueAPF();
         const APFloat &C3 = Op3->getValueAPF();
+
+        if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) {
+          RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);
+          APFloat Res = C1;
+          APFloat::opStatus St;
+          switch (IntrinsicID) {
+          default:
+            return nullptr;
+          case Intrinsic::experimental_constrained_fma:
+          case Intrinsic::experimental_constrained_fmuladd:
+            St = Res.fusedMultiplyAdd(C2, C3, RM);
+            break;
+          }
+          if (mayFoldConstrained(
+                  const_cast<ConstrainedFPIntrinsic *>(ConstrIntr), St))
+            return ConstantFP::get(Ty->getContext(), Res);
+          return nullptr;
+        }
+
         switch (IntrinsicID) {
         default: break;
         case Intrinsic::amdgcn_fma_legacy: {

diff  --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index d5e301bf5caf8..d95c053c25a10 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -491,6 +491,16 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
     if (isMathLibCallNoop(Call, TLI))
       return true;
 
+  // To express possible interaction with floating point environment constrained
+  // intrinsics are described as if they access memory. So they look like having
+  // side effect but actually do not have it unless they raise floating point
+  // exception. If FP exceptions are ignored, the intrinsic may be deleted.
+  if (auto *CI = dyn_cast<ConstrainedFPIntrinsic>(I)) {
+    Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
+    if (!EB || *EB == fp::ExceptionBehavior::ebIgnore)
+      return true;
+  }
+
   return false;
 }
 

diff  --git a/llvm/test/Transforms/InstSimplify/constfold-constrained.ll b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll
index 635bcd2f716a7..4db5fbff30aff 100644
--- a/llvm/test/Transforms/InstSimplify/constfold-constrained.ll
+++ b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll
@@ -234,6 +234,186 @@ entry:
   ret double %result
 }
 
+define float @fadd_01() #0 {
+; CHECK-LABEL: @fadd_01(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret float 3.000000e+01
+;
+entry:
+  %result = call float @llvm.experimental.constrained.fadd.f32(float 1.000000e+01, float 2.000000e+01, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret float %result
+}
+
+; Inexact result does not prevent from folding if exceptions are ignored and
+; rounding mode is known.
+define double @fadd_02() #0 {
+; CHECK-LABEL: @fadd_02(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 2.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 0x3FF0000000000001, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+define double @fadd_03() #0 {
+; CHECK-LABEL: @fadd_03(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 0x4000000000000001
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 0x3FF0000000000001, metadata !"round.upward", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+; Inexact result prevents from folding if exceptions may be checked.
+define double @fadd_04() #0 {
+; CHECK-LABEL: @fadd_04(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RESULT:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double 1.000000e+00, double 0x3FF0000000000001, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]]
+; CHECK-NEXT:    ret double [[RESULT]]
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 0x3FF0000000000001, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+; If result is exact, folding is allowed even if exceptions may be checked.
+define double @fadd_05() #0 {
+; CHECK-LABEL: @fadd_05(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 3.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 2.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+; Dynamic rounding mode does not prevent from folding if the result is exact.
+define double @fadd_06() #0 {
+; CHECK-LABEL: @fadd_06(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 3.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 2.0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+; Inexact results prevents from folding if rounding mode is unknown.
+define double @fadd_07() #0 {
+; CHECK-LABEL: @fadd_07(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RESULT:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double 1.000000e+00, double 0x3FF0000000000001, metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]]
+; CHECK-NEXT:    ret double [[RESULT]]
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 1.0, double 0x3FF0000000000001, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+; Infinite result does not prevent from folding unless exceptions are tracked.
+define double @fadd_08() #0 {
+; CHECK-LABEL: @fadd_08(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 0x7FF0000000000000
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 0x7fEFFFFFFFFFFFFF, double 0x7fEFFFFFFFFFFFFF, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+define double @fadd_09() #0 {
+; CHECK-LABEL: @fadd_09(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RESULT:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]]
+; CHECK-NEXT:    ret double [[RESULT]]
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fadd.f64(double 0x7fEFFFFFFFFFFFFF, double 0x7fEFFFFFFFFFFFFF, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+define half @fadd_10() #0 {
+; CHECK-LABEL: @fadd_10(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret half 0xH4200
+;
+entry:
+  %result = call half @llvm.experimental.constrained.fadd.f16(half 1.0, half 2.0, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret half %result
+}
+
+define bfloat @fadd_11() #0 {
+; CHECK-LABEL: @fadd_11(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret bfloat 0xR4040
+;
+entry:
+  %result = call bfloat @llvm.experimental.constrained.fadd.bf16(bfloat 1.0, bfloat 2.0, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret bfloat %result
+}
+
+define double @fsub_01() #0 {
+; CHECK-LABEL: @fsub_01(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double -1.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fsub.f64(double 1.0, double 2.0, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+define double @fmul_01() #0 {
+; CHECK-LABEL: @fmul_01(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 2.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fmul.f64(double 1.0, double 2.0, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+define double @fdiv_01() #0 {
+; CHECK-LABEL: @fdiv_01(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 5.000000e-01
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fdiv.f64(double 1.0, double 2.0, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+define double @frem_01() #0 {
+; CHECK-LABEL: @frem_01(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.frem.f64(double 1.0, double 2.0, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+define double @fma_01() #0 {
+; CHECK-LABEL: @fma_01(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 5.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fma.f64(double 1.0, double 2.0, double 3.0, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
+define double @fmuladd_01() #0 {
+; CHECK-LABEL: @fmuladd_01(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret double 5.000000e+00
+;
+entry:
+  %result = call double @llvm.experimental.constrained.fmuladd.f64(double 1.0, double 2.0, double 3.0, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
+  ret double %result
+}
+
 
 attributes #0 = { strictfp }
 
@@ -243,4 +423,14 @@ declare double @llvm.experimental.constrained.ceil.f64(double, metadata)
 declare double @llvm.experimental.constrained.trunc.f64(double, metadata)
 declare double @llvm.experimental.constrained.round.f64(double, metadata)
 declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
+declare half @llvm.experimental.constrained.fadd.f16(half, half, metadata, metadata)
+declare bfloat @llvm.experimental.constrained.fadd.bf16(bfloat, bfloat, metadata, metadata)
+declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
+declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
+declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
+declare double @llvm.experimental.constrained.frem.f64(double, double, metadata, metadata)
+declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
+declare double @llvm.experimental.constrained.fmuladd.f64(double, double, double, metadata, metadata)
 

diff  --git a/llvm/test/Transforms/InstSimplify/fdiv-strictfp.ll b/llvm/test/Transforms/InstSimplify/fdiv-strictfp.ll
index e833d1889e2fc..82101a4ef8286 100644
--- a/llvm/test/Transforms/InstSimplify/fdiv-strictfp.ll
+++ b/llvm/test/Transforms/InstSimplify/fdiv-strictfp.ll
@@ -12,14 +12,23 @@ define float @fdiv_constant_fold() #0 {
 
 define float @fdiv_constant_fold_strict() #0 {
 ; CHECK-LABEL: @fdiv_constant_fold_strict(
-; CHECK-NEXT:    [[F:%.*]] = call float @llvm.experimental.constrained.fdiv.f32(float 3.000000e+00, float 2.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0:[0-9]+]]
-; CHECK-NEXT:    ret float [[F]]
+; CHECK-NEXT:    ret float 1.500000e+00
 ;
   %f = call float @llvm.experimental.constrained.fdiv.f32(float 3.0, float 2.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
 
   ret float %f
 }
 
+define float @fdiv_constant_fold_strict2() #0 {
+; CHECK-LABEL: @fdiv_constant_fold_strict2(
+; CHECK-NEXT:    [[F:%.*]] = call float @llvm.experimental.constrained.fdiv.f32(float 2.000000e+00, float 3.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0:[0-9]+]]
+; CHECK-NEXT:    ret float [[F]]
+;
+  %f = call float @llvm.experimental.constrained.fdiv.f32(float 2.0, float 3.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+
+  ret float %f
+}
+
 define float @frem_constant_fold() #0 {
 ; CHECK-LABEL: @frem_constant_fold(
 ; CHECK-NEXT:    ret float 1.000000e+00
@@ -30,10 +39,9 @@ define float @frem_constant_fold() #0 {
 
 define float @frem_constant_fold_strict() #0 {
 ; CHECK-LABEL: @frem_constant_fold_strict(
-; CHECK-NEXT:    [[F:%.*]] = call float @llvm.experimental.constrained.fdiv.f32(float 3.000000e+00, float 2.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]]
-; CHECK-NEXT:    ret float [[F]]
+; CHECK-NEXT:    ret float 1.000000e+00
 ;
-  %f = call float @llvm.experimental.constrained.fdiv.f32(float 3.0, float 2.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %f = call float @llvm.experimental.constrained.frem.f32(float 3.0, float 2.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %f
 }