[llvm-commits] [llvm] r104415 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp test/CodeGen/ARM/2010-05-21-BuildVector.ll test/CodeGen/ARM/spill-q.ll test/CodeGen/Thumb2/thumb2-spill-q.ll
Bob Wilson
bob.wilson at apple.com
Fri May 21 17:23:12 PDT 2010
Author: bwilson
Date: Fri May 21 19:23:12 2010
New Revision: 104415
URL: http://llvm.org/viewvc/llvm-project?rev=104415&view=rev
Log:
Recognize more BUILD_VECTORs and VECTOR_SHUFFLEs that can be implemented by
copying VFP subregs. This exposed a bunch of dead code in the *spill-q.ll
tests, so I tweaked those tests to keep that code from being optimized away.
Radar 7872877.
Added:
llvm/trunk/test/CodeGen/ARM/2010-05-21-BuildVector.ll
Modified:
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
llvm/trunk/test/CodeGen/ARM/spill-q.ll
llvm/trunk/test/CodeGen/Thumb2/thumb2-spill-q.ll
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=104415&r1=104414&r2=104415&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Fri May 21 19:23:12 2010
@@ -2810,21 +2810,60 @@
}
}
- // If there are only 2 elements in a 128-bit vector, insert them into an
- // undef vector. This handles the common case for 128-bit vector argument
- // passing, where the insertions should be translated to subreg accesses
- // with no real instructions.
- if (VT.is128BitVector() && Op.getNumOperands() == 2) {
- SDValue Val = DAG.getUNDEF(VT);
- SDValue Op0 = Op.getOperand(0);
- SDValue Op1 = Op.getOperand(1);
- if (Op0.getOpcode() != ISD::UNDEF)
- Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op0,
- DAG.getIntPtrConstant(0));
- if (Op1.getOpcode() != ISD::UNDEF)
- Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op1,
- DAG.getIntPtrConstant(1));
- return Val;
+ // Scan through the operands to see if only one value is used.
+ unsigned NumElts = VT.getVectorNumElements();
+ bool isOnlyLowElement = true;
+ bool usesOnlyOneValue = true;
+ bool isConstant = true;
+ SDValue Value;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ if (i > 0)
+ isOnlyLowElement = false;
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+ isConstant = false;
+
+ if (!Value.getNode())
+ Value = V;
+ else if (V != Value)
+ usesOnlyOneValue = false;
+ }
+
+ if (!Value.getNode())
+ return DAG.getUNDEF(VT);
+
+ if (isOnlyLowElement)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
+
+ // If all elements are constants, fall back to the default expansion, which
+ // will generate a load from the constant pool.
+ if (isConstant)
+ return SDValue();
+
+ // Use VDUP for non-constant splats.
+ if (usesOnlyOneValue)
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+
+ // Vectors with 32- or 64-bit elements can be built by directly assigning
+ // the subregisters.
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (EltSize >= 32) {
+ // Do the expansion with floating-point types, since that is what the VFP
+ // registers are defined to use, and since i64 is not legal.
+ EVT EltVT = EVT::getFloatingPointVT(EltSize);
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+ SDValue Val = DAG.getUNDEF(VecVT);
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Elt = Op.getOperand(i);
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ Elt = DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Elt);
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val, Elt,
+ DAG.getConstant(i, MVT::i32));
+ }
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
}
return SDValue();
@@ -3014,8 +3053,8 @@
// If the shuffle is not directly supported and it has 4 elements, use
// the PerfectShuffle-generated table to synthesize it from other shuffles.
- if (VT.getVectorNumElements() == 4 &&
- (VT.is128BitVector() || VT.is64BitVector())) {
+ unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts == 4) {
unsigned PFIndexes[4];
for (unsigned i = 0; i != 4; ++i) {
if (ShuffleMask[i] < 0)
@@ -3027,7 +3066,6 @@
// Compute the index in the perfect shuffle table.
unsigned PFTableIndex =
PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
-
unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
unsigned Cost = (PFEntry >> 30);
@@ -3035,19 +3073,24 @@
return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
}
- // v2f64 and v2i64 shuffles are just register copies.
- if (VT == MVT::v2f64 || VT == MVT::v2i64) {
- // Do the expansion as f64 since i64 is not legal.
- V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, V1);
- V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, V2);
- SDValue Val = DAG.getUNDEF(MVT::v2f64);
- for (unsigned i = 0; i < 2; ++i) {
+ // Implement shuffles with 32- or 64-bit elements as subreg copies.
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (EltSize >= 32) {
+ // Do the expansion with floating-point types, since that is what the VFP
+ // registers are defined to use, and since i64 is not legal.
+ EVT EltVT = EVT::getFloatingPointVT(EltSize);
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+ V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1);
+ V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2);
+ SDValue Val = DAG.getUNDEF(VecVT);
+ for (unsigned i = 0; i < NumElts; ++i) {
if (ShuffleMask[i] < 0)
continue;
- SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
- ShuffleMask[i] < 2 ? V1 : V2,
- DAG.getConstant(ShuffleMask[i] & 1, MVT::i32));
- Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ ShuffleMask[i] < (int)NumElts ? V1 : V2,
+ DAG.getConstant(ShuffleMask[i] & (NumElts-1),
+ MVT::i32));
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val,
Elt, DAG.getConstant(i, MVT::i32));
}
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
Added: llvm/trunk/test/CodeGen/ARM/2010-05-21-BuildVector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2010-05-21-BuildVector.ll?rev=104415&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/2010-05-21-BuildVector.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/2010-05-21-BuildVector.ll Fri May 21 19:23:12 2010
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; Radar 7872877
+
+define arm_apcscc void @test(float* %fltp, i32 %packedValue, float* %table) nounwind {
+entry:
+ %0 = load float* %fltp
+ %1 = insertelement <4 x float> undef, float %0, i32 0
+ %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
+ %3 = shl i32 %packedValue, 16
+ %4 = ashr i32 %3, 30
+ %.sum = add i32 %4, 4
+ %5 = getelementptr inbounds float* %table, i32 %.sum
+;CHECK: vldr.32 s
+ %6 = load float* %5, align 4
+ %tmp11 = insertelement <4 x float> undef, float %6, i32 0
+ %7 = shl i32 %packedValue, 18
+ %8 = ashr i32 %7, 30
+ %.sum12 = add i32 %8, 4
+ %9 = getelementptr inbounds float* %table, i32 %.sum12
+;CHECK: vldr.32 s
+ %10 = load float* %9, align 4
+ %tmp9 = insertelement <4 x float> %tmp11, float %10, i32 1
+ %11 = shl i32 %packedValue, 20
+ %12 = ashr i32 %11, 30
+ %.sum13 = add i32 %12, 4
+ %13 = getelementptr inbounds float* %table, i32 %.sum13
+;CHECK: vldr.32 s
+ %14 = load float* %13, align 4
+ %tmp7 = insertelement <4 x float> %tmp9, float %14, i32 2
+ %15 = shl i32 %packedValue, 22
+ %16 = ashr i32 %15, 30
+ %.sum14 = add i32 %16, 4
+ %17 = getelementptr inbounds float* %table, i32 %.sum14
+;CHECK: vldr.32 s
+ %18 = load float* %17, align 4
+ %tmp5 = insertelement <4 x float> %tmp7, float %18, i32 3
+ %19 = fmul <4 x float> %tmp5, %2
+ %20 = bitcast float* %fltp to i8*
+ tail call void @llvm.arm.neon.vst1.v4f32(i8* %20, <4 x float> %19)
+ ret void
+}
+
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind
Modified: llvm/trunk/test/CodeGen/ARM/spill-q.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/spill-q.ll?rev=104415&r1=104414&r2=104415&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/spill-q.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/spill-q.ll Fri May 21 19:23:12 2010
@@ -46,7 +46,8 @@
%20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
%21 = fadd <4 x float> zeroinitializer, %20 ; <<4 x float>> [#uses=2]
%22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
- br i1 undef, label %bb193, label %bb186
+ %tmp = extractelement <4 x i1> %22, i32 0
+ br i1 %tmp, label %bb193, label %bb186
bb186: ; preds = %bb4
br label %bb193
Modified: llvm/trunk/test/CodeGen/Thumb2/thumb2-spill-q.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/thumb2-spill-q.ll?rev=104415&r1=104414&r2=104415&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/thumb2-spill-q.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/thumb2-spill-q.ll Fri May 21 19:23:12 2010
@@ -46,7 +46,8 @@
%20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
%21 = fadd <4 x float> zeroinitializer, %20 ; <<4 x float>> [#uses=2]
%22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
- br i1 undef, label %bb193, label %bb186
+ %tmp = extractelement <4 x i1> %22, i32 0
+ br i1 %tmp, label %bb193, label %bb186
bb186: ; preds = %bb4
br label %bb193
More information about the llvm-commits
mailing list