[llvm-commits] [llvm] r127630 - in /llvm/trunk: lib/Target/ARM/ARMISelDAGToDAG.cpp lib/Target/ARM/ARMISelLowering.cpp lib/Target/ARM/ARMISelLowering.h test/CodeGen/ARM/vext.ll
Bill Wendling
isanbard at gmail.com
Mon Mar 14 16:02:38 PDT 2011
Author: void
Date: Mon Mar 14 18:02:38 2011
New Revision: 127630
URL: http://llvm.org/viewvc/llvm-project?rev=127630&view=rev
Log:
Generate a VTBL instruction instead of a series of loads and stores when we
can. As Nate pointed out, VTBL isn't super performant, but it *has* to be better
than this:
_shuf:
@ BB#0: @ %entry
push {r4, r7, lr}
add r7, sp, #4
sub sp, #12
mov r4, sp
bic r4, r4, #7
mov sp, r4
mov r2, sp
vmov d16, r0, r1
orr r0, r2, #6
orr r3, r2, #7
vst1.8 {d16[0]}, [r3]
vst1.8 {d16[5]}, [r0]
subs r4, r7, #4
orr r0, r2, #5
vst1.8 {d16[4]}, [r0]
orr r0, r2, #4
vst1.8 {d16[4]}, [r0]
orr r0, r2, #3
vst1.8 {d16[0]}, [r0]
orr r0, r2, #2
vst1.8 {d16[2]}, [r0]
orr r0, r2, #1
vst1.8 {d16[1]}, [r0]
vst1.8 {d16[3]}, [r2]
vldr.64 d16, [sp]
vmov r0, r1, d16
mov sp, r4
pop {r4, r7, pc}
The "illegal" testcase in vext.ll is no longer illegal.
<rdar://problem/9078775>
Modified:
llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
llvm/trunk/lib/Target/ARM/ARMISelLowering.h
llvm/trunk/test/CodeGen/ARM/vext.ll
Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=127630&r1=127629&r2=127630&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Mon Mar 14 18:02:38 2011
@@ -2842,6 +2842,35 @@
break;
}
+ case ARMISD::VTBL1: {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ SmallVector<SDValue, 6> Ops;
+
+ Ops.push_back(N->getOperand(0));
+ Ops.push_back(N->getOperand(1));
+ Ops.push_back(getAL(CurDAG)); // Predicate
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
+ return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops.data(), Ops.size());
+ }
+ case ARMISD::VTBL2: {
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue V0 = N->getOperand(0);
+ SDValue V1 = N->getOperand(1);
+ SDValue RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
+
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(RegSeq);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(getAL(CurDAG)); // Predicate
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
+ return CurDAG->getMachineNode(ARM::VTBL2Pseudo, dl, VT,
+ Ops.data(), Ops.size());
+ }
+
case ISD::CONCAT_VECTORS:
return SelectConcatVector(N);
}
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=127630&r1=127629&r2=127630&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Mon Mar 14 18:02:38 2011
@@ -852,6 +852,10 @@
case ARMISD::VZIP: return "ARMISD::VZIP";
case ARMISD::VUZP: return "ARMISD::VUZP";
case ARMISD::VTRN: return "ARMISD::VTRN";
+ case ARMISD::VTBL1: return "ARMISD::VTBL1";
+ case ARMISD::VTBL2: return "ARMISD::VTBL2";
+ case ARMISD::VTBL3: return "ARMISD::VTBL3";
+ case ARMISD::VTBL4: return "ARMISD::VTBL4";
case ARMISD::VMULLs: return "ARMISD::VMULLs";
case ARMISD::VMULLu: return "ARMISD::VMULLu";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
@@ -4055,6 +4059,29 @@
}
}
+static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
+ SmallVectorImpl<int> &ShuffleMask,
+ SelectionDAG &DAG) {
+ // Check to see if we can use the VTBL instruction.
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ DebugLoc DL = Op.getDebugLoc();
+
+ SmallVector<SDValue, 8> VTBLMask;
+ for (SmallVectorImpl<int>::iterator
+ I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
+ VTBLMask.push_back(DAG.getConstant(*I, MVT::i32));
+
+ if (V2.getNode()->getOpcode() == ISD::UNDEF)
+ return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
+ &VTBLMask[0], 8));
+ else
+ return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
+ &VTBLMask[0], 8));
+}
+
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
@@ -4172,6 +4199,12 @@
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
+ if (VT == MVT::v8i8) {
+ SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
+ if (NewOp.getNode())
+ return NewOp;
+ }
+
return SDValue();
}
@@ -4534,7 +4567,7 @@
case ISD::GlobalAddress:
return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
LowerGlobalAddressELF(Op, DAG);
- case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=127630&r1=127629&r2=127630&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Mon Mar 14 18:02:38 2011
@@ -153,6 +153,10 @@
VZIP, // zip (interleave)
VUZP, // unzip (deinterleave)
VTRN, // transpose
+ VTBL1, // 1-register shuffle with mask
+ VTBL2, // 2-register shuffle with mask
+ VTBL3, // 3-register shuffle with mask
+ VTBL4, // 4-register shuffle with mask
// Vector multiply long:
VMULLs, // ...signed
Modified: llvm/trunk/test/CodeGen/ARM/vext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vext.ll?rev=127630&r1=127629&r2=127630&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vext.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vext.ll Mon Mar 14 18:02:38 2011
@@ -121,15 +121,3 @@
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
ret <4 x i16> %tmp2
}
-
-; The actual shuffle code only handles some cases, make sure we check
-; this rather than blindly emitting a VECTOR_SHUFFLE (infinite
-; lowering loop can result otherwise).
-define <8 x i8> @test_illegal(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-;CHECK: test_illegal:
-;CHECK: vst1.8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
- %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <8 x i32> <i32 0, i32 7, i32 5, i32 25, i32 3, i32 2, i32 2, i32 26>
- ret <8 x i8> %tmp3
-}
More information about the llvm-commits
mailing list