[llvm-branch-commits] [llvm-branch] r96392 - in /llvm/branches/Apple/Hermes: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/lsr-reuse-trunc.ll test/CodeGen/X86/sink-hoist.ll

Tue Feb 16 13:17:21 PST 2010

Author: evancheng
Date: Tue Feb 16 15:17:19 2010
New Revision: 96392

URL: http://llvm.org/viewvc/llvm-project?rev=96392&view=rev
Log:
Merge 96389.

Added:
    llvm/branches/Apple/Hermes/test/CodeGen/X86/lsr-reuse-trunc.ll
Modified:
    llvm/branches/Apple/Hermes/lib/Target/X86/X86ISelLowering.cpp
    llvm/branches/Apple/Hermes/test/CodeGen/X86/sink-hoist.ll

Modified: llvm/branches/Apple/Hermes/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Hermes/lib/Target/X86/X86ISelLowering.cpp?rev=96392&r1=96391&r2=96392&view=diff

==============================================================================

--- llvm/branches/Apple/Hermes/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/Apple/Hermes/lib/Target/X86/X86ISelLowering.cpp Tue Feb 16 15:17:19 2010
@@ -989,6 +989,7 @@
   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
   setTargetDAGCombine(ISD::BUILD_VECTOR);
   setTargetDAGCombine(ISD::SELECT);
+  setTargetDAGCombine(ISD::AND);
   setTargetDAGCombine(ISD::SHL);
   setTargetDAGCombine(ISD::SRA);
   setTargetDAGCombine(ISD::SRL);
@@ -9152,6 +9153,53 @@
   return SDValue();
 }
 
+/// PerformANDCombine - Look for SSE and instructions of this form:
+/// (and x, (build_vector c1,c2,c3,c4)). If there exists a use of a build_vector
+/// that's the bitwise complement of the mask, then transform the node to
+/// (and (xor x, (build_vector -1,-1,-1,-1)), (build_vector ~c1,~c2,~c3,~c4)).
+static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  EVT VT = N->getValueType(0);
+  if (!VT.isVector() || !VT.isInteger())
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  if (N0.getOpcode() == ISD::XOR || !N1.hasOneUse())
+    return SDValue();
+
+  if (N1.getOpcode() == ISD::BUILD_VECTOR) {
+    unsigned NumElts = VT.getVectorNumElements();
+    EVT EltVT = VT.getVectorElementType();
+    SmallVector<SDValue, 8> Mask;
+    Mask.reserve(NumElts);
+    for (unsigned i = 0; i != NumElts; ++i) {
+      SDValue Arg = N1.getOperand(i);
+      if (Arg.getOpcode() == ISD::UNDEF) {
+        Mask.push_back(Arg);
+        continue;
+      }
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(Arg);
+      if (!C) return SDValue();
+      Mask.push_back(DAG.getConstant(~C->getAPIntValue(), EltVT));
+    }
+    N1 = DAG.getNode(ISD::BUILD_VECTOR, N1.getDebugLoc(), VT,
+                     &Mask[0], NumElts);
+    if (!N1.use_empty()) {
+      unsigned Bits = EltVT.getSizeInBits();
+      Mask.clear();
+      for (unsigned i = 0; i != NumElts; ++i)
+        Mask.push_back(DAG.getConstant(APInt::getAllOnesValue(Bits), EltVT));
+      SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+                                    VT, &Mask[0], NumElts);
+      return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                         DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
+                                     N0, NewMask), N1);
+    }
+  }
+
+  return SDValue();
+}
 
 /// PerformMulCombine - Optimize a single multiply with constant into two
 /// in order to implement it with two cheaper instructions, e.g.
@@ -9300,7 +9348,7 @@
       }
     } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
-         unsigned SplatIdx = cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
+         unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
          if (C->getZExtValue() == SplatIdx)
            BaseShAmt = InVec.getOperand(1);
        }
@@ -9681,6 +9729,7 @@
   case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
   case ISD::SELECT:         return PerformSELECTCombine(N, DAG, Subtarget);
   case X86ISD::CMOV:        return PerformCMOVCombine(N, DAG, DCI);
+  case ISD::AND:            return PerformANDCombine(N, DAG, DCI);
   case ISD::MUL:            return PerformMulCombine(N, DAG, DCI);
   case ISD::SHL:
   case ISD::SRA:

Added: llvm/branches/Apple/Hermes/test/CodeGen/X86/lsr-reuse-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Hermes/test/CodeGen/X86/lsr-reuse-trunc.ll?rev=96392&view=auto

==============================================================================
--- llvm/branches/Apple/Hermes/test/CodeGen/X86/lsr-reuse-trunc.ll (added)
+++ llvm/branches/Apple/Hermes/test/CodeGen/X86/lsr-reuse-trunc.ll Tue Feb 16 15:17:19 2010
@@ -0,0 +1,69 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+; Full strength reduction wouldn't reduce register pressure, so LSR should
+; stick with indexing here.
+
+; Also checks andps and andnps shares the same constantpool. Previously llvm
+; will codegen two andps, one using 0x80000000, the other 0x7fffffff.
+; rdar://7323335
+
+; CHECK: movaps LCPI1_0
+; CHECK: movaps LCPI1_1
+; CHECK-NOT: movaps LCPI1_2
+; CHECK: movaps (%rsi,%rcx), %xmm2
+; CHECK: andps
+; CHECK: andnps
+; CHECK: movaps %xmm2, (%rdi,%rcx)
+; CHECK: addq $16, %rcx
+; CHECK: addl $4, %eax
+; CHECK: cmpl %eax, (%rdx)
+; CHECK-NEXT: jg
+
+define void @vvfloorf(float* nocapture %y, float* nocapture %x, i32* nocapture %n) nounwind {
+entry:
+  %0 = load i32* %n, align 4
+  %1 = icmp sgt i32 %0, 0
+  br i1 %1, label %bb, label %return
+
+bb:
+  %indvar = phi i64 [ %indvar.next, %bb ], [ 0, %entry ]
+  %tmp = shl i64 %indvar, 2
+  %scevgep = getelementptr float* %y, i64 %tmp
+  %scevgep9 = bitcast float* %scevgep to <4 x float>*
+  %scevgep10 = getelementptr float* %x, i64 %tmp
+  %scevgep1011 = bitcast float* %scevgep10 to <4 x float>*
+  %2 = load <4 x float>* %scevgep1011, align 16
+  %3 = bitcast <4 x float> %2 to <4 x i32>
+  %4 = and <4 x i32> %3, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+  %5 = bitcast <4 x i32> %4 to <4 x float>
+  %6 = and <4 x i32> %3, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+  %7 = tail call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %5, <4 x float> <float 8.388608e+06, float 8.388608e+06, float 8.388608e+06, float 8.388608e+06>, i8 5) nounwind
+  %tmp.i4 = bitcast <4 x float> %7 to <4 x i32>
+  %8 = xor <4 x i32> %tmp.i4, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %9 = and <4 x i32> %8, <i32 1258291200, i32 1258291200, i32 1258291200, i32 1258291200>
+  %10 = or <4 x i32> %9, %6
+  %11 = bitcast <4 x i32> %10 to <4 x float>
+  %12 = fadd <4 x float> %2, %11
+  %13 = fsub <4 x float> %12, %11
+  %14 = tail call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %2, <4 x float> %13, i8 1) nounwind
+  %15 = bitcast <4 x float> %14 to <4 x i32>
+  %16 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %15) nounwind readnone
+  %17 = fadd <4 x float> %13, %16
+  %tmp.i = bitcast <4 x float> %17 to <4 x i32>
+  %18 = or <4 x i32> %tmp.i, %6
+  %19 = bitcast <4 x i32> %18 to <4 x float>
+  store <4 x float> %19, <4 x float>* %scevgep9, align 16
+  %tmp12 = add i64 %tmp, 4
+  %tmp13 = trunc i64 %tmp12 to i32
+  %20 = load i32* %n, align 4
+  %21 = icmp sgt i32 %20, %tmp13
+  %indvar.next = add i64 %indvar, 1
+  br i1 %21, label %bb, label %return
+
+return:
+  ret void
+}
+
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone

Modified: llvm/branches/Apple/Hermes/test/CodeGen/X86/sink-hoist.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Hermes/test/CodeGen/X86/sink-hoist.ll?rev=96392&r1=96391&r2=96392&view=diff

==============================================================================
--- llvm/branches/Apple/Hermes/test/CodeGen/X86/sink-hoist.ll (original)
+++ llvm/branches/Apple/Hermes/test/CodeGen/X86/sink-hoist.ll Tue Feb 16 15:17:19 2010
@@ -63,7 +63,6 @@
 ; CHECK: vv:
 ; CHECK: LCPI4_0(%rip), %xmm0
 ; CHECK: LCPI4_1(%rip), %xmm1
-; CHECK: LCPI4_2(%rip), %xmm2
 ; CHECK: align
 ; CHECK-NOT: LCPI
 ; CHECK: ret