[llvm] r213341 - AArch64: Constant fold converting vector setcc results to float.

Thu Jul 17 17:40:53 PDT 2014

Author: grosbach
Date: Thu Jul 17 19:40:52 2014
New Revision: 213341

URL: http://llvm.org/viewvc/llvm-project?rev=213341&view=rev
Log:
AArch64: Constant fold converting vector setcc results to float.

Since the result of a SETCC for AArch64 is 0 or -1 in each lane, we can
move unary operations, in this case [su]int_to_fp through the mask
operation and constant fold the operation away. Generally speaking:
  UNARYOP(AND(VECTOR_CMP(x,y), constant))
      --> AND(VECTOR_CMP(x,y), constant2)
where constant2 is UNARYOP(constant).

This implements the transform where UNARYOP is [su]int_to_fp.

For example, consider the simple function:
define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind {
  %cmp = fcmp oeq <4 x float> %val, %test
  %ext = zext <4 x i1> %cmp to <4 x i32>
  %result = sitofp <4 x i32> %ext to <4 x float>
  ret <4 x float> %result
}

Before this change, the code is generated as:
  fcmeq.4s  v0, v0, v1
  movi.4s v1, #0x1        // Integer splat value.
  and.16b v0, v0, v1      // Mask lanes based on the comparison.
  scvtf.4s  v0, v0        // Convert each lane to f32.
  ret

After, the code is improved to:
  fcmeq.4s  v0, v0, v1
  fmov.4s v1, #1.00000000 // f32 splat value.
  and.16b v0, v0, v1      // Mask lanes based on the comparison.
  ret

The svvtf.4s has been constant folded away and the floating point 1.0f
vector lanes are materialized directly via fmov.4s.

Rather than do the folding manually in the target code, teach getNode()
in the generic SelectionDAG to handle folding constant operands of
vector [su]int_to_fp nodes. It is reasonable (as noted in a FIXME) to do
additional constant folding there as well, but I don't have test cases
for those operations, so leaving them for another time when it becomes
appropriate.

rdar://17693791

Added:
    llvm/trunk/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll
Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=213341&r1=213340&r2=213341&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Thu Jul 17 19:40:52 2014
@@ -2772,6 +2772,32 @@ SDValue SelectionDAG::getNode(unsigned O
     }
   }
 
+  // Constant fold unary operations with a vector integer operand.
+  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand.getNode())) {
+    APInt Val;
+    APInt DummyUndefs;
+    unsigned SplatBitSize;
+    bool DummyHasUndefs;
+    if (BV->isConstantSplat(Val, DummyUndefs, SplatBitSize, DummyHasUndefs)) {
+      switch (Opcode) {
+      default:
+        // FIXME: Entirely reasonable to perform folding of other unary
+        // operations here as the need arises.
+        break;
+      case ISD::UINT_TO_FP:
+      case ISD::SINT_TO_FP: {
+        APFloat APF(
+            EVTToAPFloatSemantics(VT.getVectorElementType()),
+            APInt::getNullValue(VT.getVectorElementType().getSizeInBits()));
+        (void)APF.convertFromAPInt(Val, Opcode == ISD::SINT_TO_FP,
+                                   APFloat::rmNearestTiesToEven);
+
+        return getConstantFP(APF, VT);
+      }
+      }
+    }
+  }
+
   unsigned OpOpcode = Operand.getNode()->getOpcode();
   switch (Opcode) {
   case ISD::TokenFactor:

Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=213341&r1=213340&r2=213341&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Thu Jul 17 19:40:52 2014
@@ -6417,10 +6417,61 @@ static SDValue performMulCombine(SDNode
   return SDValue();
 }
 
+static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
+                                                         SelectionDAG &DAG) {
+  // Take advantage of vector comparisons producing 0 or -1 in each lane to
+  // optimize away operation when it's from a constant.
+  //
+  // The general transformation is:
+  //    UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
+  //       AND(VECTOR_CMP(x,y), constant2)
+  //    constant2 = UNARYOP(constant)
+
+  // Early exit if this isn't a vector operation or if the operand of the
+  // unary operation isn't a bitwise AND.
+  EVT VT = N->getValueType(0);
+  if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
+      N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC)
+    return SDValue();
+
+  // Now check that the other operand of the AND is a constant splat. We could
+  // make the transformation for non-constant splats as well, but it's unclear
+  // that would be a benefit as it would not eliminate any operations, just
+  // perform one more step in scalar code before moving to the vector unit.
+  if (BuildVectorSDNode *BV =
+          dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
+    // Bail out if the vector isn't a constant splat.
+    if (!BV->getConstantSplatNode())
+      return SDValue();
+
+    // Everything checks out. Build up the new and improved node.
+    SDLoc DL(N);
+    EVT IntVT = BV->getValueType(0);
+    // Create a new constant of the appropriate type for the transformed
+    // DAG.
+    SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
+    // The AND node needs bitcasts to/from an integer vector type around it.
+    SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
+    SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
+                                 N->getOperand(0)->getOperand(0), MaskConst);
+    SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
+    return Res;
+  }
+
+  return SDValue();
+}
+
 static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) {
+  // First try to optimize away the conversion when it's conditionally from
+  // a constant. Vectors only.
+  SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG);
+  if (Res != SDValue())
+    return Res;
+
   EVT VT = N->getValueType(0);
   if (VT != MVT::f32 && VT != MVT::f64)
     return SDValue();
+
   // Only optimize when the source and destination types have the same width.
   if (VT.getSizeInBits() != N->getOperand(0).getValueType().getSizeInBits())
     return SDValue();

Added: llvm/trunk/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll?rev=213341&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll Thu Jul 17 19:40:52 2014
@@ -0,0 +1,13 @@
+; RUN: llc < %s -asm-verbose=false -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind {
+; CHECK-LABEL: foo:
+; CHECK-NEXT: fcmeq.4s  v0, v0, v1
+; CHECK-NEXT: fmov.4s v1, #1.00000000
+; CHECK-NEXT: and.16b v0, v0, v1
+; CHECK-NEXT: ret
+  %cmp = fcmp oeq <4 x float> %val, %test
+  %ext = zext <4 x i1> %cmp to <4 x i32>
+  %result = sitofp <4 x i32> %ext to <4 x float>
+  ret <4 x float> %result
+}