[llvm] r222635 - [X86] Fixes bug in build_vector v4x32 lowering
Michael Kuperstein
michael.m.kuperstein at intel.com
Sun Nov 23 05:09:08 PST 2014
Author: mkuper
Date: Sun Nov 23 07:09:06 2014
New Revision: 222635
URL: http://llvm.org/viewvc/llvm-project?rev=222635&view=rev
Log:
[X86] Fixes bug in build_vector v4x32 lowering
r222375 made some improvements to build_vector lowering of v4x32 and v4xf32 into an insertps, but it missed a case where:
1. A single extracted element is used twice.
2. The lower of the two non-zero indexes should be preserved, and the higher should be used for the dest mask.
This caused a crash, since the source value for the insertps ends-up uninitialized.
Differential Revision: http://reviews.llvm.org/D6377
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/sse41.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=222635&r1=222634&r2=222635&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Nov 23 07:09:06 2014
@@ -5771,7 +5771,8 @@ static SDValue LowerBuildVectorv4x32(SDV
// We only know how to deal with build_vector nodes where elements are either
// zeroable or extract_vector_elt with constant index.
SDValue FirstNonZero;
- for (int i=0; i < 4; ++i) {
+ unsigned FirstNonZeroIdx;
+ for (unsigned i=0; i < 4; ++i) {
if (Zeroable[i])
continue;
SDValue Elt = Op->getOperand(i);
@@ -5782,8 +5783,10 @@ static SDValue LowerBuildVectorv4x32(SDV
MVT VT = Elt.getOperand(0).getSimpleValueType();
if (!VT.is128BitVector())
return SDValue();
- if (!FirstNonZero.getNode())
+ if (!FirstNonZero.getNode()) {
FirstNonZero = Elt;
+ FirstNonZeroIdx = i;
+ }
}
assert(FirstNonZero.getNode() && "Unexpected build vector of all zeros!");
@@ -5822,7 +5825,7 @@ static SDValue LowerBuildVectorv4x32(SDV
return SDValue();
SDValue V2 = Elt.getOperand(0);
- if (Elt == FirstNonZero)
+ if (Elt == FirstNonZero && EltIdx == FirstNonZeroIdx)
V1 = SDValue();
bool CanFold = true;
Modified: llvm/trunk/test/CodeGen/X86/sse41.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41.ll?rev=222635&r1=222634&r2=222635&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41.ll Sun Nov 23 07:09:06 2014
@@ -1145,6 +1145,23 @@ entry:
ret <4 x float> %vecinit3
}
+define <4 x float> @insertps_10(<4 x float> %A)
+{
+; X32-LABEL: insertps_10:
+; X32: ## BB#0:
+; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0],zero
+; X32-NEXT: retl
+;
+; X64-LABEL: insertps_10:
+; X64: ## BB#0:
+; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0],zero
+; X64-NEXT: retq
+ %vecext = extractelement <4 x float> %A, i32 0
+ %vecbuild1 = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %vecext, i32 0
+ %vecbuild2 = insertelement <4 x float> %vecbuild1, float %vecext, i32 2
+ ret <4 x float> %vecbuild2
+}
+
define <4 x float> @build_vector_to_shuffle_1(<4 x float> %A) {
; X32-LABEL: build_vector_to_shuffle_1:
; X32: ## BB#0:
More information about the llvm-commits
mailing list