[llvm] r359791 - [SelectionDAG] remove constant folding limitations based on FP exceptions

Thu May 2 07:48:00 PDT 2019

Author: spatel
Date: Thu May  2 07:47:59 2019
New Revision: 359791

URL: http://llvm.org/viewvc/llvm-project?rev=359791&view=rev
Log:
[SelectionDAG] remove constant folding limitations based on FP exceptions

We don't have FP exception limits in the IR constant folder for the binops (apart from strict ops),
so it does not make sense to have them here in the DAG either. Nothing else in the backend tries
to preserve exceptions (again outside of strict ops), so I don't see how this could have ever
worked for real code that cares about FP exceptions.

There are still cases (examples: unary opcodes in SDAG, FMA in IR) where we are trying (at least
partially) to preserve exceptions without even asking if the target supports FP exceptions. Those
should be corrected in subsequent patches.

Real support for FP exceptions requires several changes to handle the constrained/strict FP ops.

Differential Revision: https://reviews.llvm.org/D61331

Modified:
    llvm/trunk/include/llvm/CodeGen/TargetLowering.h
    llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
    llvm/trunk/test/CodeGen/AArch64/fp-const-fold.ll

Modified: llvm/trunk/include/llvm/CodeGen/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetLowering.h?rev=359791&r1=359790&r2=359791&view=diff
==============================================================================

--- llvm/trunk/include/llvm/CodeGen/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h Thu May  2 07:47:59 2019
@@ -582,11 +582,6 @@ public:
     return false;
   }
 
-  /// Return true if target supports floating point exceptions.
-  bool hasFloatingPointExceptions() const {
-    return HasFloatingPointExceptions;
-  }
-
   /// Return true if target always beneficiates from combining into FMA for a
   /// given value type. This must typically return false on targets where FMA
   /// takes more cycles to execute than FADD.
@@ -1915,12 +1910,6 @@ protected:
   /// control.
   void setJumpIsExpensive(bool isExpensive = true);
 
-  /// Tells the code generator that this target supports floating point
-  /// exceptions and cares about preserving floating point exception behavior.
-  void setHasFloatingPointExceptions(bool FPExceptions = true) {
-    HasFloatingPointExceptions = FPExceptions;
-  }
-
   /// Tells the code generator which bitwidths to bypass.
   void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) {
     BypassSlowDivWidths[SlowBitWidth] = FastBitWidth;
@@ -2580,10 +2569,6 @@ private:
   /// predication.
   bool JumpIsExpensive;
 
-  /// Whether the target supports or cares about preserving floating point
-  /// exception behavior.
-  bool HasFloatingPointExceptions;
-
   /// This target prefers to use _setjmp to implement llvm.setjmp.
   ///
   /// Defaults to false.

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=359791&r1=359790&r2=359791&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Thu May  2 07:47:59 2019
@@ -4804,38 +4804,30 @@ SDValue SelectionDAG::FoldConstantVector
 
 SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
                                          EVT VT, SDValue N1, SDValue N2) {
+  // TODO: We don't do any constant folding for strict FP opcodes here, but we
+  //       should. That will require dealing with a potentially non-default
+  //       rounding mode, checking the "opStatus" return value from the APFloat
+  //       math calculations, and possibly other variations.
   auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode());
   auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
-  bool HasFPExceptions = TLI->hasFloatingPointExceptions();
   if (N1CFP && N2CFP) {
     APFloat C1 = N1CFP->getValueAPF(), C2 = N2CFP->getValueAPF();
-    APFloat::opStatus Status;
     switch (Opcode) {
     case ISD::FADD:
-      Status = C1.add(C2, APFloat::rmNearestTiesToEven);
-      if (!HasFPExceptions || Status != APFloat::opInvalidOp)
-        return getConstantFP(C1, DL, VT);
-      break;
+      C1.add(C2, APFloat::rmNearestTiesToEven);
+      return getConstantFP(C1, DL, VT);
     case ISD::FSUB:
-      Status = C1.subtract(C2, APFloat::rmNearestTiesToEven);
-      if (!HasFPExceptions || Status != APFloat::opInvalidOp)
-        return getConstantFP(C1, DL, VT);
-      break;
+      C1.subtract(C2, APFloat::rmNearestTiesToEven);
+      return getConstantFP(C1, DL, VT);
     case ISD::FMUL:
-      Status = C1.multiply(C2, APFloat::rmNearestTiesToEven);
-      if (!HasFPExceptions || Status != APFloat::opInvalidOp)
-        return getConstantFP(C1, DL, VT);
-      break;
+      C1.multiply(C2, APFloat::rmNearestTiesToEven);
+      return getConstantFP(C1, DL, VT);
     case ISD::FDIV:
-      Status = C1.divide(C2, APFloat::rmNearestTiesToEven);
-      if (!HasFPExceptions || Status != APFloat::opInvalidOp)
-        return getConstantFP(C1, DL, VT);
-      break;
+      C1.divide(C2, APFloat::rmNearestTiesToEven);
+      return getConstantFP(C1, DL, VT);
     case ISD::FREM:
-      Status = C1.mod(C2);
-      if (!HasFPExceptions || Status != APFloat::opInvalidOp)
-        return getConstantFP(C1, DL, VT);
-      break;
+      C1.mod(C2);
+      return getConstantFP(C1, DL, VT);
     case ISD::FCOPYSIGN:
       C1.copySign(C2);
       return getConstantFP(C1, DL, VT);
@@ -5311,10 +5303,8 @@ SDValue SelectionDAG::getNode(unsigned O
       APFloat  V1 = N1CFP->getValueAPF();
       const APFloat &V2 = N2CFP->getValueAPF();
       const APFloat &V3 = N3CFP->getValueAPF();
-      APFloat::opStatus s =
-        V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
-      if (!TLI->hasFloatingPointExceptions() || s != APFloat::opInvalidOp)
-        return getConstantFP(V1, DL, VT);
+      V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
+      return getConstantFP(V1, DL, VT);
     }
     break;
   }

Modified: llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp?rev=359791&r1=359790&r2=359791&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp (original)
+++ llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp Thu May  2 07:47:59 2019
@@ -545,7 +545,6 @@ TargetLoweringBase::TargetLoweringBase(c
   JumpIsExpensive = JumpIsExpensiveOverride;
   PredictableSelectIsExpensive = false;
   EnableExtLdPromotion = false;
-  HasFloatingPointExceptions = true;
   StackPointerRegisterToSaveRestore = 0;
   BooleanContents = UndefinedBooleanContent;
   BooleanFloatContents = UndefinedBooleanContent;

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=359791&r1=359790&r2=359791&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Thu May  2 07:47:59 2019
@@ -729,11 +729,6 @@ SITargetLowering::SITargetLowering(const
   setTargetDAGCombine(ISD::ATOMIC_LOAD_FADD);
 
   setSchedulingPreference(Sched::RegPressure);
-
-  // SI at least has hardware support for floating point exceptions, but no way
-  // of using or handling them is implemented. They are also optional in OpenCL
-  // (Section 7.3)
-  setHasFloatingPointExceptions(Subtarget->hasFPExceptions());
 }
 
 const GCNSubtarget *SITargetLowering::getSubtarget() const {

Modified: llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp?rev=359791&r1=359790&r2=359791&view=diff
==============================================================================
--- llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp Thu May  2 07:47:59 2019
@@ -45,9 +45,6 @@ WebAssemblyTargetLowering::WebAssemblyTa
   setBooleanContents(ZeroOrOneBooleanContent);
   // Except in SIMD vectors
   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
-  // WebAssembly does not produce floating-point exceptions on normal floating
-  // point operations.
-  setHasFloatingPointExceptions(false);
   // We don't know the microarchitecture here, so just reduce register pressure.
   setSchedulingPreference(Sched::RegPressure);
   // Tell ISel that we have a stack pointer.

Modified: llvm/trunk/test/CodeGen/AArch64/fp-const-fold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fp-const-fold.ll?rev=359791&r1=359790&r2=359791&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fp-const-fold.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fp-const-fold.ll Thu May  2 07:47:59 2019
@@ -18,10 +18,9 @@ define double @constant_fold_fdiv_by_zer
 define double @constant_fold_frem_by_zero(double* %p) {
 ; CHECK-LABEL: constant_fold_frem_by_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #1
-; CHECK-NEXT:    fmov d1, xzr
+; CHECK-NEXT:    mov x8, #9221120237041090560
 ; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    b fmod
+; CHECK-NEXT:    ret
   %r = frem double 4.940660e-324, 0.0
   ret double %r
 }
@@ -31,10 +30,8 @@ define double @constant_fold_frem_by_zer
 define double @constant_fold_fmul_nan(double* %p) {
 ; CHECK-LABEL: constant_fold_fmul_nan:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #9218868437227405312
-; CHECK-NEXT:    fmov d0, xzr
-; CHECK-NEXT:    fmov d1, x8
-; CHECK-NEXT:    fmul d0, d1, d0
+; CHECK-NEXT:    mov x8, #9221120237041090560
+; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    ret
   %r = fmul double 0x7ff0000000000000, 0.0
   ret double %r
@@ -45,11 +42,8 @@ define double @constant_fold_fmul_nan(do
 define double @constant_fold_fadd_nan(double* %p) {
 ; CHECK-LABEL: constant_fold_fadd_nan:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #-4503599627370496
-; CHECK-NEXT:    mov x9, #9218868437227405312
+; CHECK-NEXT:    mov x8, #9221120237041090560
 ; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    fmov d1, x9
-; CHECK-NEXT:    fadd d0, d1, d0
 ; CHECK-NEXT:    ret
   %r = fadd double 0x7ff0000000000000, 0xfff0000000000000
   ret double %r
@@ -60,9 +54,8 @@ define double @constant_fold_fadd_nan(do
 define double @constant_fold_fsub_nan(double* %p) {
 ; CHECK-LABEL: constant_fold_fsub_nan:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #9218868437227405312
+; CHECK-NEXT:    mov x8, #9221120237041090560
 ; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    fsub d0, d0, d0
 ; CHECK-NEXT:    ret
   %r = fsub double 0x7ff0000000000000, 0x7ff0000000000000
   ret double %r
@@ -73,12 +66,8 @@ define double @constant_fold_fsub_nan(do
 define double @constant_fold_fma_nan(double* %p) {
 ; CHECK-LABEL: constant_fold_fma_nan:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #4631107791820423168
-; CHECK-NEXT:    mov x9, #9218868437227405312
-; CHECK-NEXT:    fmov d0, xzr
-; CHECK-NEXT:    fmov d1, x8
-; CHECK-NEXT:    fmov d2, x9
-; CHECK-NEXT:    fmadd d0, d2, d0, d1
+; CHECK-NEXT:    mov x8, #9221120237041090560
+; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    ret
   %r =  call double @llvm.fma.f64(double 0x7ff0000000000000, double 0.0, double 42.0)
   ret double %r