[llvm-commits] [llvm] r149692 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/2011-12-8-bitcastintprom.ll test/CodeGen/X86/avx-shuffle.ll
Nadav Rotem
nadav.rotem at intel.com
Fri Feb 3 05:18:25 PST 2012
Author: nadav
Date: Fri Feb 3 07:18:25 2012
New Revision: 149692
URL: http://llvm.org/viewvc/llvm-project?rev=149692&view=rev
Log:
The type-legalizer often scalarizes code. One of the common patterns is extract-and-truncate.
In this patch we optimize this pattern and convert the sequence into extract op of a narrow type.
This allows the BUILD_VECTOR dag optimizations to construct efficient shuffle operations in many cases.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/X86/2011-12-8-bitcastintprom.ll
llvm/trunk/test/CodeGen/X86/avx-shuffle.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=149692&r1=149691&r2=149692&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Feb 3 07:18:25 2012
@@ -4957,6 +4957,7 @@
SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ bool isLE = TLI.isLittleEndian();
// noop truncate
if (N0.getValueType() == N->getValueType(0))
@@ -4984,6 +4985,39 @@
return N0.getOperand(0);
}
+ // Fold Extract-and-trunc into a narrow extract:
+ // trunc(extract(x)) -> extract(bitcast(x))
+ // We only run this optimization after type legalization (which often
+ // creates this pattern) and before operation legalization after which
+ // we need to be more careful about the vector instructions that we generate.
+ if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ LegalTypes && !LegalOperations && N0->hasOneUse()) {
+
+ EVT VecTy = N0.getOperand(0).getValueType();
+ EVT ExTy = N0.getValueType();
+ EVT TrTy = N->getValueType(0);
+
+ unsigned NumElem = VecTy.getVectorNumElements();
+ unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
+ assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
+
+ SDValue EltNo = N0->getOperand(1);
+ if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+
+ int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
+
+ SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+ NVT, N0.getOperand(0));
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+ N->getDebugLoc(), TrTy, V,
+ DAG.getConstant(Index, MVT::i32));
+ }
+ }
+
// See if we can simplify the input to this truncate through knowledge that
// only the low bits are being used.
// For example "trunc (or (shl x, 8), y)" // -> trunc y
Modified: llvm/trunk/test/CodeGen/X86/2011-12-8-bitcastintprom.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2011-12-8-bitcastintprom.ll?rev=149692&r1=149691&r2=149692&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2011-12-8-bitcastintprom.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2011-12-8-bitcastintprom.ll Fri Feb 3 07:18:25 2012
@@ -2,8 +2,8 @@
; Make sure that the conversion between v4i8 to v2i16 is not a simple bitcast.
; CHECK: prom_bug
-; CHECK: movd
; CHECK: shufb
+; CHECK: movd
; CHECK: movw
; CHECK: ret
define void @prom_bug(<4 x i8> %t, i16* %p) {
Modified: llvm/trunk/test/CodeGen/X86/avx-shuffle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-shuffle.ll?rev=149692&r1=149691&r2=149692&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-shuffle.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-shuffle.ll Fri Feb 3 07:18:25 2012
@@ -109,3 +109,11 @@
%tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %tmp1
}
+
+;CHECK: test13
+;CHECK: shufd
+;CHECK: ret
+define <4 x i32> @test13(<2 x i32>%x) nounwind readnone {
+ %x1 = shufflevector <2 x i32> %x, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ ret <4 x i32>%x1
+}
More information about the llvm-commits
mailing list