[llvm] r175670 - DAGCombiner: Fold pointless truncate, bitcast, buildvector series
Arnold Schwaighofer
aschwaighofer at apple.com
Wed Feb 20 13:33:32 PST 2013
Author: arnolds
Date: Wed Feb 20 15:33:32 2013
New Revision: 175670
URL: http://llvm.org/viewvc/llvm-project?rev=175670&view=rev
Log:
DAGCombiner: Fold pointless truncate, bitcast, buildvector series
(2xi32) (truncate ((2xi64) bitcast (buildvector i32 a, i32 x, i32 b, i32 y)))
can be folded into a (2xi32) (buildvector i32 a, i32 b).
Such a DAG would cause uneccessary vdup instructions followed by vmovn
instructions.
We generate this code on ARM NEON for a setcc olt, 2xf64, 2xf64. For example, in
the vectorized version of the code below.
double A[N];
double B[N];
void test_double_compare_to_double() {
int i;
for(i=0;i<N;i++)
A[i] = (double)(A[i] < B[i]);
}
radar://13191881
Fixes bug 15283.
Added:
llvm/trunk/test/CodeGen/ARM/neon_cmp.ll
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=175670&r1=175669&r2=175670&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed Feb 20 15:33:32 2013
@@ -5361,6 +5361,38 @@ SDValue DAGCombiner::visitTRUNCATE(SDNod
}
}
+ // Fold a series of buildvector, bitcast, and truncate if possible.
+ // For example fold
+ // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
+ // (2xi32 (buildvector x, y)).
+ if (Level == AfterLegalizeVectorOps && VT.isVector() &&
+ N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
+ N0.getOperand(0).hasOneUse()) {
+
+ SDValue BuildVect = N0.getOperand(0);
+ EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
+ EVT TruncVecEltTy = VT.getVectorElementType();
+
+ // Check that the element types match.
+ if (BuildVectEltTy == TruncVecEltTy) {
+ // Now we only need to compute the offset of the truncated elements.
+ unsigned BuildVecNumElts = BuildVect.getNumOperands();
+ unsigned TruncVecNumElts = VT.getVectorNumElements();
+ unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
+
+ assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
+ "Invalid number of elements");
+
+ SmallVector<SDValue, 8> Opnds;
+ for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
+ Opnds.push_back(BuildVect.getOperand(i));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, &Opnds[0],
+ Opnds.size());
+ }
+ }
+
// See if we can simplify the input to this truncate through knowledge that
// only the low bits are being used.
// For example "trunc (or (shl x, 8), y)" // -> trunc y
Added: llvm/trunk/test/CodeGen/ARM/neon_cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/neon_cmp.ll?rev=175670&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/neon_cmp.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/neon_cmp.ll Wed Feb 20 15:33:32 2013
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+; bug 15283
+; radar://13191881
+; CHECK: vfcmp
+define void @vfcmp(<2 x double>* %a, <2 x double>* %b) {
+ %wide.load = load <2 x double>* %a, align 4
+ %wide.load2 = load <2 x double>* %b, align 4
+; CHECK-NOT: vdup.32
+; CHECK-NOT: vmovn.i64
+ %v1 = fcmp olt <2 x double> %wide.load, %wide.load2
+ %v2 = zext <2 x i1> %v1 to <2 x i32>
+ %v3 = sitofp <2 x i32> %v2 to <2 x double>
+ store <2 x double> %v3, <2 x double>* %b, align 4
+ ret void
+}
More information about the llvm-commits
mailing list