[llvm] r271573 - transform obscured FP sign bit ops into a fabs/fneg using TLI hook

Thu Jun 2 13:01:38 PDT 2016

Author: spatel
Date: Thu Jun  2 15:01:37 2016
New Revision: 271573

URL: http://llvm.org/viewvc/llvm-project?rev=271573&view=rev
Log:
transform obscured FP sign bit ops into a fabs/fneg using TLI hook

This is effectively a revert of:
http://reviews.llvm.org/rL249702 - [InstCombine] transform masking off of an FP sign bit into a fabs() intrinsic call (PR24886)
and:
http://reviews.llvm.org/rL249701 - [ValueTracking] teach computeKnownBits that a fabs() clears sign bits
and a reimplementation as a DAG combine for targets that have IEEE754-compliant fabs/fneg instructions.

This is intended to resolve the objections raised on the dev list:
http://lists.llvm.org/pipermail/llvm-dev/2016-April/098154.html
and:
https://llvm.org/bugs/show_bug.cgi?id=24886#c4

In the interest of patch minimalism, I've only partly enabled AArch64. PowerPC, MIPS, x86 and others can enable later.

Differential Revision: http://reviews.llvm.org/D19391


Modified:
    llvm/trunk/include/llvm/Target/TargetLowering.h
    llvm/trunk/lib/Analysis/ValueTracking.cpp
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
    llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
    llvm/trunk/test/CodeGen/AArch64/fcvt-int.ll
    llvm/trunk/test/Transforms/InstCombine/and2.ll
    llvm/trunk/test/Transforms/InstCombine/fabs.ll

Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=271573&r1=271572&r2=271573&view=diff
==============================================================================

--- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/Target/TargetLowering.h Thu Jun  2 15:01:37 2016
@@ -319,6 +319,14 @@ public:
     return false;
   }
 
+  /// Return true if it is safe to transform an integer-domain bitwise operation
+  /// into the equivalent floating-point operation. This should be set to true
+  /// if the target has IEEE-754-compliant fabs/fneg operations for the input
+  /// type.
+  virtual bool hasBitPreservingFPLogic(EVT VT) const {
+    return false;
+  }
+
   /// \brief Return if the target supports combining a
   /// chain like:
   /// \code

Modified: llvm/trunk/lib/Analysis/ValueTracking.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ValueTracking.cpp?rev=271573&r1=271572&r2=271573&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/ValueTracking.cpp (original)
+++ llvm/trunk/lib/Analysis/ValueTracking.cpp Thu Jun  2 15:01:37 2016
@@ -991,8 +991,7 @@ static void computeKnownBitsFromOperator
   }
   case Instruction::BitCast: {
     Type *SrcTy = I->getOperand(0)->getType();
-    if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy() ||
-         SrcTy->isFloatingPointTy()) &&
+    if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
         // TODO: For now, not handling conversions like:
         // (bitcast i64 %x to <2 x i32>)
         !I->getType()->isVectorTy()) {
@@ -1316,12 +1315,6 @@ static void computeKnownBitsFromOperator
         // of bits which might be set provided by popcnt KnownOne2.
         break;
       }
-      case Intrinsic::fabs: {
-        Type *Ty = II->getType();
-        APInt SignBit = APInt::getSignBit(Ty->getScalarSizeInBits());
-        KnownZero |= APInt::getSplat(Ty->getPrimitiveSizeInBits(), SignBit);
-        break;
-      }
       case Intrinsic::x86_sse42_crc32_64_64:
         KnownZero |= APInt::getHighBitsSet(64, 32);
         break;
@@ -1381,9 +1374,8 @@ void computeKnownBits(Value *V, APInt &K
   unsigned BitWidth = KnownZero.getBitWidth();
 
   assert((V->getType()->isIntOrIntVectorTy() ||
-          V->getType()->isFPOrFPVectorTy() ||
           V->getType()->getScalarType()->isPointerTy()) &&
-         "Not integer, floating point, or pointer type!");
+         "Not integer or pointer type!");
   assert((Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
          (!V->getType()->isIntOrIntVectorTy() ||
           V->getType()->getScalarSizeInBits() == BitWidth) &&

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=271573&r1=271572&r2=271573&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Jun  2 15:01:37 2016
@@ -7351,6 +7351,49 @@ static unsigned getPPCf128HiElementSelec
   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
 }
 
+static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
+                                    const TargetLowering &TLI) {
+  // If this is not a bitcast to an FP type or if the target doesn't have
+  // IEEE754-compliant FP logic, we're done.
+  EVT VT = N->getValueType(0);
+  if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
+    return SDValue();
+
+  // TODO: Use splat values for the constant-checking below and remove this
+  // restriction.
+  SDValue N0 = N->getOperand(0);
+  EVT SourceVT = N0.getValueType();
+  if (SourceVT.isVector())
+    return SDValue();
+
+  unsigned FPOpcode;
+  APInt SignMask;
+  switch (N0.getOpcode()) {
+  case ISD::AND:
+    FPOpcode = ISD::FABS;
+    SignMask = ~APInt::getSignBit(SourceVT.getSizeInBits());
+    break;
+  case ISD::XOR:
+    FPOpcode = ISD::FNEG;
+    SignMask = APInt::getSignBit(SourceVT.getSizeInBits());
+    break;
+  // TODO: ISD::OR --> ISD::FNABS?
+  default:
+    return SDValue();
+  }
+
+  // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
+  // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
+  SDValue LogicOp0 = N0.getOperand(0);
+  ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+  if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
+      LogicOp0.getOpcode() == ISD::BITCAST &&
+      LogicOp0->getOperand(0).getValueType() == VT)
+    return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
@@ -7415,6 +7458,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode
     }
   }
 
+  if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
+    return V;
+
   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
   //

Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h?rev=271573&r1=271572&r2=271573&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h Thu Jun  2 15:01:37 2016
@@ -395,6 +395,12 @@ public:
   bool isCheapToSpeculateCtlz() const override {
     return true;
   }
+
+  bool hasBitPreservingFPLogic(EVT VT) const override {
+    // FIXME: Is this always true? It should be true for vectors at least.
+    return VT == MVT::f32 || VT == MVT::f64;
+  }
+
   bool supportSplitCSR(MachineFunction *MF) const override {
     return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
            MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp?rev=271573&r1=271572&r2=271573&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp Thu Jun  2 15:01:37 2016
@@ -1594,24 +1594,6 @@ Instruction *InstCombiner::visitAnd(Bina
   if (Instruction *CastedAnd = foldCastedBitwiseLogic(I))
     return CastedAnd;
 
-  if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
-    Value *Op0COp = Op0C->getOperand(0);
-    Type *SrcTy = Op0COp->getType();
-
-    // If we are masking off the sign bit of a floating-point value, convert
-    // this to the canonical fabs intrinsic call and cast back to integer.
-    // The backend should know how to optimize fabs().
-    // TODO: This transform should also apply to vectors.
-    ConstantInt *CI;
-    if (isa<BitCastInst>(Op0C) && SrcTy->isFloatingPointTy() &&
-        match(Op1, m_ConstantInt(CI)) && CI->isMaxValue(true)) {
-      Module *M = I.getModule();
-      Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, SrcTy);
-      Value *Call = Builder->CreateCall(Fabs, Op0COp, "fabs");
-      return CastInst::CreateBitOrPointerCast(Call, I.getType());
-    }
-  }
-
   if (Instruction *Select = foldBoolSextMaskToSelect(I))
     return Select;
 

Modified: llvm/trunk/test/CodeGen/AArch64/fcvt-int.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fcvt-int.ll?rev=271573&r1=271572&r2=271573&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fcvt-int.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fcvt-int.ll Thu Jun  2 15:01:37 2016
@@ -153,9 +153,7 @@ define double @test_bitcasti64todouble(i
 define double @bitcast_fabs(double %x) {
 ; CHECK-LABEL: bitcast_fabs:
 ; CHECK:       ; BB#0:
-; CHECK-NEXT:    fmov x8, d0
-; CHECK-NEXT:    and x8, x8, #0x7fffffffffffffff
-; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    fabs d0, d0
 ; CHECK-NEXT:    ret
 ;
   %bc1 = bitcast double %x to i64
@@ -167,9 +165,7 @@ define double @bitcast_fabs(double %x) {
 define float @bitcast_fneg(float %x) {
 ; CHECK-LABEL: bitcast_fneg:
 ; CHECK:       ; BB#0:
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    eor w8, w8, #0x80000000
-; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    fneg s0, s0
 ; CHECK-NEXT:    ret
 ;
   %bc1 = bitcast float %x to i32

Modified: llvm/trunk/test/Transforms/InstCombine/and2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/and2.ll?rev=271573&r1=271572&r2=271573&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/and2.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/and2.ll Thu Jun  2 15:01:37 2016
@@ -103,45 +103,3 @@ define i64 @test10(i64 %x) {
   ret i64 %add
 }
 
-define i64 @fabs_double(double %x) {
-; CHECK-LABEL: @fabs_double(
-; CHECK-NEXT:  %fabs = call double @llvm.fabs.f64(double %x)
-; CHECK-NEXT:  %and = bitcast double %fabs to i64
-; CHECK-NEXT:  ret i64 %and
-  %bc = bitcast double %x to i64
-  %and = and i64 %bc, 9223372036854775807
-  ret i64 %and
-}
-
-define i64 @fabs_double_swap(double %x) {
-; CHECK-LABEL: @fabs_double_swap(
-; CHECK-NEXT:  %fabs = call double @llvm.fabs.f64(double %x)
-; CHECK-NEXT:  %and = bitcast double %fabs to i64
-; CHECK-NEXT:  ret i64 %and
-  %bc = bitcast double %x to i64
-  %and = and i64 9223372036854775807, %bc
-  ret i64 %and
-}
-
-define i32 @fabs_float(float %x) {
-; CHECK-LABEL: @fabs_float(
-; CHECK-NEXT:  %fabs = call float @llvm.fabs.f32(float %x)
-; CHECK-NEXT:  %and = bitcast float %fabs to i32
-; CHECK-NEXT:  ret i32 %and
-  %bc = bitcast float %x to i32
-  %and = and i32 %bc, 2147483647
-  ret i32 %and
-}
-
-; Make sure that only a bitcast is transformed.
-
-define i64 @fabs_double_not_bitcast(double %x) {
-; CHECK-LABEL: @fabs_double_not_bitcast(
-; CHECK-NEXT:  %bc = fptoui double %x to i64
-; CHECK-NEXT:  %and = and i64 %bc, 9223372036854775807
-; CHECK-NEXT:  ret i64 %and
-  %bc = fptoui double %x to i64
-  %and = and i64 %bc, 9223372036854775807
-  ret i64 %and
-}
-

Modified: llvm/trunk/test/Transforms/InstCombine/fabs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/fabs.ll?rev=271573&r1=271572&r2=271573&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/fabs.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/fabs.ll Thu Jun  2 15:01:37 2016
@@ -41,7 +41,6 @@ define fp128 @square_fabs_call_f128(fp12
 declare float @llvm.fabs.f32(float)
 declare double @llvm.fabs.f64(double)
 declare fp128 @llvm.fabs.f128(fp128)
-declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
 
 define float @square_fabs_intrinsic_f32(float %x) {
   %mul = fmul float %x, %x
@@ -99,27 +98,3 @@ define float @square_fabs_shrink_call2(f
 ; CHECK-NEXT: ret float %sq
 }
 
-; A scalar fabs op makes the sign bit zero, so masking off all of the other bits means we can return zero.
-
-define i32 @fabs_value_tracking_f32(float %x) {
-  %call = call float @llvm.fabs.f32(float %x)
-  %bc = bitcast float %call to i32
-  %and = and i32 %bc, 2147483648
-  ret i32 %and
-
-; CHECK-LABEL: fabs_value_tracking_f32(
-; CHECK:       ret i32 0
-}
-
-; TODO: A vector fabs op makes the sign bits zero, so masking off all of the other bits means we can return zero.
-
-define <4 x i32> @fabs_value_tracking_v4f32(<4 x float> %x) {
-  %call = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
-  %bc = bitcast <4 x float> %call to <4 x i32>
-  %and = and <4 x i32> %bc, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
-  ret <4 x i32> %and
-
-; CHECK-LABEL: fabs_value_tracking_v4f32(
-; CHECK:       ret <4 x i32> %and
-}
-