[llvm] r291624 - InstSimplify: Eliminate fabs on known positive

Tue Jan 10 16:33:25 PST 2017

Author: arsenm
Date: Tue Jan 10 18:33:24 2017
New Revision: 291624

URL: http://llvm.org/viewvc/llvm-project?rev=291624&view=rev
Log:
InstSimplify: Eliminate fabs on known positive

Modified:
    llvm/trunk/include/llvm/Analysis/ValueTracking.h
    llvm/trunk/lib/Analysis/InstructionSimplify.cpp
    llvm/trunk/lib/Analysis/ValueTracking.cpp
    llvm/trunk/test/Transforms/InstCombine/fabs.ll
    llvm/trunk/test/Transforms/InstCombine/fast-math.ll
    llvm/trunk/test/Transforms/InstSimplify/floating-point-arithmetic.ll

Modified: llvm/trunk/include/llvm/Analysis/ValueTracking.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/ValueTracking.h?rev=291624&r1=291623&r2=291624&view=diff
==============================================================================

--- llvm/trunk/include/llvm/Analysis/ValueTracking.h (original)
+++ llvm/trunk/include/llvm/Analysis/ValueTracking.h Tue Jan 10 18:33:24 2017
@@ -169,8 +169,12 @@ template <typename T> class ArrayRef;
 
   /// Return true if we can prove that the specified FP value is either a NaN or
   /// never less than 0.0.
-  bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI,
-                                   unsigned Depth = 0);
+  /// If \p IncludeNeg0 is false, -0.0 is considered less than 0.0.
+  bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI);
+
+  /// \returns true if we can prove that the specified FP value has a 0 sign
+  /// bit.
+  bool SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI);
 
   /// If the specified value can be set by repeating the same byte in memory,
   /// return the i8 value that it is represented with. This is true for all i8

Modified: llvm/trunk/lib/Analysis/InstructionSimplify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/InstructionSimplify.cpp?rev=291624&r1=291623&r2=291624&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/InstructionSimplify.cpp (original)
+++ llvm/trunk/lib/Analysis/InstructionSimplify.cpp Tue Jan 10 18:33:24 2017
@@ -4308,10 +4308,21 @@ static Value *SimplifyIntrinsic(Function
     return nullptr;
 
   // Unary Ops
-  if (NumOperands == 1)
-    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin))
+  if (NumOperands == 1) {
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(*ArgBegin)) {
       if (II->getIntrinsicID() == IID)
         return II;
+    }
+
+    switch (IID) {
+    case Intrinsic::fabs: {
+      if (SignBitMustBeZero(*ArgBegin, Q.TLI))
+        return *ArgBegin;
+    }
+    default:
+      break;
+    }
+  }
 
   return nullptr;
 }

Modified: llvm/trunk/lib/Analysis/ValueTracking.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ValueTracking.cpp?rev=291624&r1=291623&r2=291624&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/ValueTracking.cpp (original)
+++ llvm/trunk/lib/Analysis/ValueTracking.cpp Tue Jan 10 18:33:24 2017
@@ -2580,51 +2580,70 @@ bool llvm::CannotBeNegativeZero(const Va
   return false;
 }
 
-bool llvm::CannotBeOrderedLessThanZero(const Value *V,
-                                       const TargetLibraryInfo *TLI,
-                                       unsigned Depth) {
-  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
-    return !CFP->getValueAPF().isNegative() || CFP->getValueAPF().isZero();
+/// If \p SignBitOnly is true, test for a known 0 sign bit rather than a
+/// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign
+/// bit despite comparing equal.
+static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
+                                            const TargetLibraryInfo *TLI,
+                                            bool SignBitOnly,
+                                            unsigned Depth) {
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+    return !CFP->getValueAPF().isNegative() ||
+           (!SignBitOnly && CFP->getValueAPF().isZero());
+  }
 
   if (Depth == MaxDepth)
-    return false;  // Limit search depth.
+    return false; // Limit search depth.
 
   const Operator *I = dyn_cast<Operator>(V);
-  if (!I) return false;
+  if (!I)
+    return false;
 
   switch (I->getOpcode()) {
-  default: break;
+  default:
+    break;
   // Unsigned integers are always nonnegative.
   case Instruction::UIToFP:
     return true;
   case Instruction::FMul:
     // x*x is always non-negative or a NaN.
-    if (I->getOperand(0) == I->getOperand(1))
+    if (I->getOperand(0) == I->getOperand(1) &&
+        (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()))
       return true;
+
     LLVM_FALLTHROUGH;
   case Instruction::FAdd:
   case Instruction::FDiv:
   case Instruction::FRem:
-    return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) &&
-           CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1);
+    return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
+                                           Depth + 1) &&
+           cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
+                                           Depth + 1);
   case Instruction::Select:
-    return CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1) &&
-           CannotBeOrderedLessThanZero(I->getOperand(2), TLI, Depth + 1);
+    return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
+                                           Depth + 1) &&
+           cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly,
+                                           Depth + 1);
   case Instruction::FPExt:
   case Instruction::FPTrunc:
     // Widening/narrowing never change sign.
-    return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1);
+    return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
+                                           Depth + 1);
   case Instruction::Call:
     Intrinsic::ID IID = getIntrinsicForCallSite(cast<CallInst>(I), TLI);
     switch (IID) {
     default:
       break;
     case Intrinsic::maxnum:
-      return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) ||
-             CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1);
+      return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
+                                             Depth + 1) ||
+             cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
+                                             Depth + 1);
     case Intrinsic::minnum:
-      return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1) &&
-             CannotBeOrderedLessThanZero(I->getOperand(1), TLI, Depth + 1);
+      return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
+                                             Depth + 1) &&
+             cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
+                                             Depth + 1);
     case Intrinsic::exp:
     case Intrinsic::exp2:
     case Intrinsic::fabs:
@@ -2636,18 +2655,30 @@ bool llvm::CannotBeOrderedLessThanZero(c
         if (CI->getBitWidth() <= 64 && CI->getSExtValue() % 2u == 0)
           return true;
       }
-      return CannotBeOrderedLessThanZero(I->getOperand(0), TLI, Depth + 1);
+      return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
+                                             Depth + 1);
     case Intrinsic::fma:
     case Intrinsic::fmuladd:
       // x*x+y is non-negative if y is non-negative.
       return I->getOperand(0) == I->getOperand(1) &&
-             CannotBeOrderedLessThanZero(I->getOperand(2), TLI, Depth + 1);
+             (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()) &&
+             cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly,
+                                             Depth + 1);
     }
     break;
   }
   return false;
 }
 
+bool llvm::CannotBeOrderedLessThanZero(const Value *V,
+                                       const TargetLibraryInfo *TLI) {
+  return cannotBeOrderedLessThanZeroImpl(V, TLI, false, 0);
+}
+
+bool llvm::SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI) {
+  return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0);
+}
+
 /// If the specified value can be set by repeating the same byte in memory,
 /// return the i8 value that it is represented with.  This is
 /// true for all i8 values obviously, but is also true for i32 0, i32 -1,

Modified: llvm/trunk/test/Transforms/InstCombine/fabs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/fabs.ll?rev=291624&r1=291623&r2=291624&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/fabs.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/fabs.ll Tue Jan 10 18:33:24 2017
@@ -5,6 +5,8 @@
 declare float @fabsf(float)
 declare double @fabs(double)
 declare fp128 @fabsl(fp128)
+declare float @llvm.fma.f32(float, float, float)
+declare float @llvm.fmuladd.f32(float, float, float)
 
 define float @square_fabs_call_f32(float %x) {
   %mul = fmul float %x, %x
@@ -80,7 +82,6 @@ define fp128 @square_fabs_intrinsic_f128
 ; CHECK-NEXT: ret fp128 %fabsl
 }
 
-; TODO: This should be able to elimnated the fabs
 define float @square_nnan_fabs_intrinsic_f32(float %x) {
   %mul = fmul nnan float %x, %x
   %fabsf = call float @llvm.fabs.f32(float %mul)
@@ -88,8 +89,7 @@ define float @square_nnan_fabs_intrinsic
 
 ; CHECK-LABEL: square_nnan_fabs_intrinsic_f32(
 ; CHECK-NEXT: %mul = fmul nnan float %x, %x
-; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %mul)
-; CHECK-NEXT: ret float %fabsf
+; CHECK-NEXT: ret float %mul
 }
 
 ; Shrinking a library call to a smaller type should not be inhibited by nor inhibit the square optimization.
@@ -170,3 +170,47 @@ define float @fabs_select_var_constant_n
   %fabs = call float @llvm.fabs.f32(float %select)
   ret float %fabs
 }
+
+; The fabs cannot be eliminated because %x may be a NaN
+define float @square_fma_fabs_intrinsic_f32(float %x) {
+  %fma = call float @llvm.fma.f32(float %x, float %x, float 1.0)
+  %fabsf = call float @llvm.fabs.f32(float %fma)
+  ret float %fabsf
+
+; CHECK-LABEL: @square_fma_fabs_intrinsic_f32(
+; CHECK-NEXT: %fma = call float @llvm.fma.f32(float %x, float %x, float 1.000000e+00)
+; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %fma)
+; CHECK-NEXT: ret float %fabsf
+}
+
+; The fabs cannot be eliminated because %x may be a NaN
+define float @square_nnan_fma_fabs_intrinsic_f32(float %x) {
+  %fma = call nnan float @llvm.fma.f32(float %x, float %x, float 1.0)
+  %fabsf = call float @llvm.fabs.f32(float %fma)
+  ret float %fabsf
+
+; CHECK-LABEL: @square_nnan_fma_fabs_intrinsic_f32(
+; CHECK-NEXT: %fma = call nnan float @llvm.fma.f32(float %x, float %x, float 1.000000e+00)
+; CHECK-NEXT: ret float %fma
+}
+
+define float @square_fmuladd_fabs_intrinsic_f32(float %x) {
+  %fmuladd = call float @llvm.fmuladd.f32(float %x, float %x, float 1.0)
+  %fabsf = call float @llvm.fabs.f32(float %fmuladd)
+  ret float %fabsf
+
+; CHECK-LABEL: @square_fmuladd_fabs_intrinsic_f32(
+; CHECK-NEXT: %fmuladd = call float @llvm.fmuladd.f32(float %x, float %x, float 1.000000e+00)
+; CHECK-NEXT: %fabsf = call float @llvm.fabs.f32(float %fmuladd)
+; CHECK-NEXT: ret float %fabsf
+}
+
+define float @square_nnan_fmuladd_fabs_intrinsic_f32(float %x) {
+  %fmuladd = call nnan float @llvm.fmuladd.f32(float %x, float %x, float 1.0)
+  %fabsf = call float @llvm.fabs.f32(float %fmuladd)
+  ret float %fabsf
+
+; CHECK-LABEL: @square_nnan_fmuladd_fabs_intrinsic_f32(
+; CHECK-NEXT: %fmuladd = call nnan float @llvm.fmuladd.f32(float %x, float %x, float 1.000000e+00)
+; CHECK-NEXT: ret float %fmuladd
+}

Modified: llvm/trunk/test/Transforms/InstCombine/fast-math.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/fast-math.ll?rev=291624&r1=291623&r2=291624&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/fast-math.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/fast-math.ll Tue Jan 10 18:33:24 2017
@@ -241,7 +241,7 @@ define float @fmul2(float %f1) {
 ; X/C1 * C2 => X * (C2/C1) is disabled if X/C1 has multiple uses
 @fmul2_external = external global float
 define float @fmul2_disable(float %f1) {
-  %div = fdiv fast float 1.000000e+00, %f1 
+  %div = fdiv fast float 1.000000e+00, %f1
   store float %div, float* @fmul2_external
   %mul = fmul fast float %div, 2.000000e+00
   ret float %mul
@@ -672,8 +672,7 @@ define double @sqrt_intrinsic_arg_4th(do
 
 ; CHECK-LABEL: sqrt_intrinsic_arg_4th(
 ; CHECK-NEXT: %mul = fmul fast double %x, %x
-; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %mul)
-; CHECK-NEXT: ret double %fabs
+; CHECK-NEXT: ret double %mul
 }
 
 define double @sqrt_intrinsic_arg_5th(double %x) {
@@ -685,9 +684,8 @@ define double @sqrt_intrinsic_arg_5th(do
 
 ; CHECK-LABEL: sqrt_intrinsic_arg_5th(
 ; CHECK-NEXT: %mul = fmul fast double %x, %x
-; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %mul)
 ; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %x)
-; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
+; CHECK-NEXT: %1 = fmul fast double %mul, %sqrt1
 ; CHECK-NEXT: ret double %1
 }
 

Modified: llvm/trunk/test/Transforms/InstSimplify/floating-point-arithmetic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstSimplify/floating-point-arithmetic.ll?rev=291624&r1=291623&r2=291624&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstSimplify/floating-point-arithmetic.ll (original)
+++ llvm/trunk/test/Transforms/InstSimplify/floating-point-arithmetic.ll Tue Jan 10 18:33:24 2017
@@ -103,3 +103,95 @@ define float @PR22688(float %x) {
   ret float %7
 }
 
+declare float @llvm.fabs.f32(float)
+
+; CHECK-LABEL: @fabs_select_positive_constants(
+; CHECK: %select = select i1 %cmp, float 1.000000e+00, float 2.000000e+00
+; CHECK-NEXT: ret float %select
+define float @fabs_select_positive_constants(i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 1.0, float 2.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_constant_variable(
+; CHECK: %select = select i1 %cmp, float 1.000000e+00, float %x
+; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_constant_variable(i32 %c, float %x) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 1.0, float %x
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_neg0_pos0(
+; CHECK: %select = select i1 %cmp, float -0.000000e+00, float 0.000000e+00
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+; CHECK-NEXT: ret float %fabs
+define float @fabs_select_neg0_pos0(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float -0.0, float 0.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_neg0_neg1(
+; CHECK: %select = select i1 %cmp, float -0.000000e+00, float -1.000000e+00
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_neg0_neg1(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float -0.0, float -1.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_nan_nan(
+; CHECK: %select = select i1 %cmp, float 0x7FF8000000000000, float 0x7FF8000100000000
+; CHECK-NEXT: ret float %select
+define float @fabs_select_nan_nan(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 0x7FF8000000000000, float 0x7FF8000100000000
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_negnan_nan(
+; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000000000000
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_negnan_nan(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000000000000
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_negnan_negnan(
+; CHECK:  %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000100000000
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_negnan_negnan(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 0xFFF8000000000000, float 0x7FF8000100000000
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_negnan_negzero(
+; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float -0.000000e+00
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_negnan_negzero(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 0xFFF8000000000000, float -0.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}
+
+; CHECK-LABEL: @fabs_select_negnan_zero(
+; CHECK: %select = select i1 %cmp, float 0xFFF8000000000000, float 0.000000e+00
+; CHECK: %fabs = call float @llvm.fabs.f32(float %select)
+define float @fabs_select_negnan_zero(float addrspace(1)* %out, i32 %c) {
+  %cmp = icmp eq i32 %c, 0
+  %select = select i1 %cmp, float 0xFFF8000000000000, float 0.0
+  %fabs = call float @llvm.fabs.f32(float %select)
+  ret float %fabs
+}