[llvm] r346908 - [X86] Support v2i32/v4i16/v8i8 load/store using f64 on 32-bit targets under -x86-experimental-vector-widening-legalization.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 14 15:02:09 PST 2018
Author: ctopper
Date: Wed Nov 14 15:02:09 2018
New Revision: 346908
URL: http://llvm.org/viewvc/llvm-project?rev=346908&view=rev
Log:
[X86] Support v2i32/v4i16/v8i8 load/store using f64 on 32-bit targets under -x86-experimental-vector-widening-legalization.
On 64-bit targets the type legalizer will use i64 to legalize these. But when i64 isn't legal, the type legalizer won't try an FP type. So do it manually instead.
There are a few regressions in here due to some v2i32 operations like mul and div now being reassembled into a full vector just to store instead of storing the pieces. But this was already occuring in 64-bit mode so its not a new issue.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/shrink_vmul-widen.ll
llvm/trunk/test/CodeGen/X86/vector-idiv-v2i32.ll
llvm/trunk/test/CodeGen/X86/widen_cast-4.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=346908&r1=346907&r2=346908&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Nov 14 15:02:09 2018
@@ -905,7 +905,13 @@ X86TargetLowering::X86TargetLowering(con
// 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
// store.
setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
+ setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
setOperationAction(ISD::STORE, MVT::v2f32, Custom);
+ setOperationAction(ISD::STORE, MVT::v2i32, Custom);
+ setOperationAction(ISD::STORE, MVT::v4i16, Custom);
+ setOperationAction(ISD::STORE, MVT::v8i8, Custom);
setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
@@ -20073,14 +20079,24 @@ static SDValue LowerStore(SDValue Op, co
if (St->isTruncatingStore())
return SDValue();
- assert(StoredVal.getValueType() == MVT::v2f32 && "Unexpected VT");
+ MVT StoreVT = StoredVal.getSimpleValueType();
+ assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 &&
+ "Unexpected VT");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (DAG.getTargetLoweringInfo().getTypeAction(*DAG.getContext(), StoreVT) !=
+ TargetLowering::TypeWidenVector)
+ return SDValue();
- // Widen the vector, cast to a v2x64 type, extract the single 64-bit
- // element and store it.
- StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, StoredVal,
- DAG.getUNDEF(MVT::v2f32));
- StoredVal = DAG.getBitcast(MVT::v2f64, StoredVal);
- StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, StoredVal,
+ // Widen the vector, cast to a v2x64 type, extract the single 64-bit element
+ // and store it.
+ MVT WideVT = MVT::getVectorVT(StoreVT.getVectorElementType(),
+ StoreVT.getVectorNumElements() * 2);
+ StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, StoredVal,
+ DAG.getUNDEF(StoreVT));
+ MVT StVT = Subtarget.is64Bit() && StoreVT.isInteger() ? MVT::i64 : MVT::f64;
+ MVT CastVT = MVT::getVectorVT(StVT, 2);
+ StoredVal = DAG.getBitcast(CastVT, StoredVal);
+ StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StVT, StoredVal,
DAG.getIntPtrConstant(0, dl));
return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
@@ -26567,20 +26583,27 @@ void X86TargetLowering::ReplaceNodeResul
break;
}
case ISD::LOAD: {
- // Use an f64 load and a scalar_to_vector for v2f32 loads. This avoids
- // scalarizing in 32-bit mode. In 64-bit mode this avoids a int->fp cast
- // since type legalization will try to use an i64 load.
- assert(N->getValueType(0) == MVT::v2f32 && "Unexpected VT");
+ // Use an f64/i64 load and a scalar_to_vector for v2f32/v2i32 loads. This
+ // avoids scalarizing in 32-bit mode. In 64-bit mode this avoids a int->fp
+ // cast since type legalization will try to use an i64 load.
+ MVT VT = N->getSimpleValueType(0);
+ assert(VT.isVector() && VT.getSizeInBits() == 64 && "Unexpected VT");
+ if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
+ return;
if (!ISD::isNON_EXTLoad(N))
return;
auto *Ld = cast<LoadSDNode>(N);
- SDValue Res = DAG.getLoad(MVT::f64, dl, Ld->getChain(), Ld->getBasePtr(),
+ MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64;
+ SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(),
Ld->getPointerInfo(),
Ld->getAlignment(),
Ld->getMemOperand()->getFlags());
SDValue Chain = Res.getValue(1);
- Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Res);
- Res = DAG.getBitcast(MVT::v4f32, Res);
+ MVT WideVT = MVT::getVectorVT(LdVT, 2);
+ Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, WideVT, Res);
+ MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(),
+ VT.getVectorNumElements() * 2);
+ Res = DAG.getBitcast(CastVT, Res);
Results.push_back(Res);
Results.push_back(Chain);
return;
Modified: llvm/trunk/test/CodeGen/X86/shrink_vmul-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/shrink_vmul-widen.ll?rev=346908&r1=346907&r2=346908&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/shrink_vmul-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/shrink_vmul-widen.ll Wed Nov 14 15:02:09 2018
@@ -31,9 +31,7 @@ define void @mul_2xi8(i8* nocapture read
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; X86-SSE-NEXT: pmullw %xmm0, %xmm1
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; X86-SSE-NEXT: movd %xmm1, (%esi,%ecx,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%esi,%ecx,4)
+; X86-SSE-NEXT: movq %xmm1, (%esi,%ecx,4)
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
@@ -54,8 +52,7 @@ define void @mul_2xi8(i8* nocapture read
; X86-AVX-NEXT: vmovd %eax, %xmm1
; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1
; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%esi,%ecx,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%esi,%ecx,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: popl %edi
; X86-AVX-NEXT: retl
@@ -206,10 +203,10 @@ define void @mul_8xi8(i8* nocapture read
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; X86-SSE-NEXT: pmullw %xmm0, %xmm1
; X86-SSE-NEXT: movdqa %xmm1, %xmm0
-; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; X86-SSE-NEXT: movdqu %xmm1, (%esi,%ecx,4)
-; X86-SSE-NEXT: movdqu %xmm0, 16(%esi,%ecx,4)
+; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X86-SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; X86-SSE-NEXT: movdqu %xmm1, 16(%esi,%ecx,4)
+; X86-SSE-NEXT: movdqu %xmm0, (%esi,%ecx,4)
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
@@ -485,9 +482,7 @@ define void @mul_2xi16(i8* nocapture rea
; X86-SSE-NEXT: pmulhuw %xmm0, %xmm2
; X86-SSE-NEXT: pmullw %xmm0, %xmm1
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; X86-SSE-NEXT: movd %xmm1, (%esi,%ecx,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%esi,%ecx,4)
+; X86-SSE-NEXT: movq %xmm1, (%esi,%ecx,4)
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
@@ -503,8 +498,7 @@ define void @mul_2xi16(i8* nocapture rea
; X86-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%esi,%ecx,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%esi,%ecx,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: retl
;
@@ -923,9 +917,7 @@ define void @mul_2xi8_sext(i8* nocapture
; X86-SSE-NEXT: pmullw %xmm0, %xmm1
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; X86-SSE-NEXT: psrad $16, %xmm0
-; X86-SSE-NEXT: movd %xmm0, (%esi,%ecx,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%esi,%ecx,4)
+; X86-SSE-NEXT: movq %xmm0, (%esi,%ecx,4)
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
@@ -946,8 +938,7 @@ define void @mul_2xi8_sext(i8* nocapture
; X86-AVX-NEXT: vmovd %eax, %xmm1
; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1
; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%esi,%ecx,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%esi,%ecx,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: popl %edi
; X86-AVX-NEXT: retl
@@ -1026,9 +1017,7 @@ define void @mul_2xi8_sext_zext(i8* noca
; X86-SSE-NEXT: pmulhw %xmm0, %xmm2
; X86-SSE-NEXT: pmullw %xmm1, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; X86-SSE-NEXT: movd %xmm0, (%esi,%ecx,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%esi,%ecx,4)
+; X86-SSE-NEXT: movq %xmm0, (%esi,%ecx,4)
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
@@ -1049,8 +1038,7 @@ define void @mul_2xi8_sext_zext(i8* noca
; X86-AVX-NEXT: vmovd %eax, %xmm1
; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1
; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%esi,%ecx,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%esi,%ecx,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: popl %edi
; X86-AVX-NEXT: retl
@@ -1124,9 +1112,7 @@ define void @mul_2xi16_sext(i8* nocaptur
; X86-SSE-NEXT: pmulhw %xmm0, %xmm2
; X86-SSE-NEXT: pmullw %xmm0, %xmm1
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; X86-SSE-NEXT: movd %xmm1, (%esi,%ecx,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%esi,%ecx,4)
+; X86-SSE-NEXT: movq %xmm1, (%esi,%ecx,4)
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
@@ -1147,8 +1133,7 @@ define void @mul_2xi16_sext(i8* nocaptur
; X86-AVX-NEXT: vmovd %eax, %xmm1
; X86-AVX-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1
; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%esi,%ecx,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%esi,%ecx,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: popl %edi
; X86-AVX-NEXT: retl
@@ -1217,11 +1202,12 @@ define void @mul_2xi16_sext_zext(i8* noc
; X86-SSE-NEXT: pxor %xmm2, %xmm2
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; X86-SSE-NEXT: pmuludq %xmm2, %xmm3
; X86-SSE-NEXT: pmuludq %xmm0, %xmm1
-; X86-SSE-NEXT: movd %xmm1, (%esi,%ecx,4)
-; X86-SSE-NEXT: movd %xmm3, 4(%esi,%ecx,4)
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X86-SSE-NEXT: pmuludq %xmm2, %xmm0
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-SSE-NEXT: movq %xmm1, (%esi,%ecx,4)
; X86-SSE-NEXT: popl %esi
; X86-SSE-NEXT: retl
;
@@ -1240,8 +1226,7 @@ define void @mul_2xi16_sext_zext(i8* noc
; X86-AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; X86-AVX-NEXT: vpmulld %xmm0, %xmm1, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%esi,%ecx,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%esi,%ecx,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%esi,%ecx,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: popl %edi
; X86-AVX-NEXT: retl
@@ -1470,9 +1455,7 @@ define void @mul_2xi8_varconst1(i8* noca
; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; X86-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4)
+; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: mul_2xi8_varconst1:
@@ -1488,8 +1471,7 @@ define void @mul_2xi8_varconst1(i8* noca
; X86-AVX-NEXT: vmovd %ecx, %xmm0
; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
@@ -1547,9 +1529,7 @@ define void @mul_2xi8_varconst2(i8* noca
; X86-SSE-NEXT: pmullw {{\.LCPI.*}}, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; X86-SSE-NEXT: psrad $16, %xmm0
-; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4)
+; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: mul_2xi8_varconst2:
@@ -1565,8 +1545,7 @@ define void @mul_2xi8_varconst2(i8* noca
; X86-AVX-NEXT: vmovd %ecx, %xmm0
; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
@@ -1627,9 +1606,7 @@ define void @mul_2xi8_varconst3(i8* noca
; X86-SSE-NEXT: pmulhw %xmm1, %xmm2
; X86-SSE-NEXT: pmullw %xmm1, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4)
+; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: mul_2xi8_varconst3:
@@ -1645,8 +1622,7 @@ define void @mul_2xi8_varconst3(i8* noca
; X86-AVX-NEXT: vmovd %ecx, %xmm0
; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
@@ -1709,9 +1685,7 @@ define void @mul_2xi8_varconst4(i8* noca
; X86-SSE-NEXT: pmulhw %xmm1, %xmm2
; X86-SSE-NEXT: pmullw %xmm1, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4)
+; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: mul_2xi8_varconst4:
@@ -1727,8 +1701,7 @@ define void @mul_2xi8_varconst4(i8* noca
; X86-AVX-NEXT: vmovd %ecx, %xmm0
; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
@@ -1791,9 +1764,7 @@ define void @mul_2xi8_varconst5(i8* noca
; X86-SSE-NEXT: pmulhw %xmm1, %xmm2
; X86-SSE-NEXT: pmullw %xmm1, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4)
+; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: mul_2xi8_varconst5:
@@ -1809,8 +1780,7 @@ define void @mul_2xi8_varconst5(i8* noca
; X86-AVX-NEXT: vmovd %ecx, %xmm0
; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
@@ -1873,9 +1843,7 @@ define void @mul_2xi8_varconst6(i8* noca
; X86-SSE-NEXT: pmulhw %xmm1, %xmm2
; X86-SSE-NEXT: pmullw %xmm1, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4)
+; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: mul_2xi8_varconst6:
@@ -1891,8 +1859,7 @@ define void @mul_2xi8_varconst6(i8* noca
; X86-AVX-NEXT: vmovd %ecx, %xmm0
; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
@@ -1952,9 +1919,7 @@ define void @mul_2xi16_varconst1(i8* noc
; X86-SSE-NEXT: pmulhuw %xmm1, %xmm2
; X86-SSE-NEXT: pmullw %xmm1, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4)
+; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: mul_2xi16_varconst1:
@@ -1965,8 +1930,7 @@ define void @mul_2xi16_varconst1(i8* noc
; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi16_varconst1:
@@ -2019,9 +1983,7 @@ define void @mul_2xi16_varconst2(i8* noc
; X86-SSE-NEXT: pmulhw %xmm1, %xmm2
; X86-SSE-NEXT: pmullw %xmm1, %xmm0
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4)
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X86-SSE-NEXT: movd %xmm0, 4(%edx,%eax,4)
+; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: mul_2xi16_varconst2:
@@ -2037,8 +1999,7 @@ define void @mul_2xi16_varconst2(i8* noc
; X86-AVX-NEXT: vmovd %ecx, %xmm0
; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
@@ -2092,13 +2053,14 @@ define void @mul_2xi16_varconst3(i8* noc
; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: pxor %xmm1, %xmm1
; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,65536,u,u>
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
-; X86-SSE-NEXT: pmuludq %xmm1, %xmm3
-; X86-SSE-NEXT: pmuludq %xmm2, %xmm0
-; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4)
-; X86-SSE-NEXT: movd %xmm3, 4(%edx,%eax,4)
+; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,65536,u,u>
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; X86-SSE-NEXT: pmuludq %xmm1, %xmm0
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X86-SSE-NEXT: pmuludq %xmm2, %xmm1
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: mul_2xi16_varconst3:
@@ -2109,8 +2071,7 @@ define void @mul_2xi16_varconst3(i8* noc
; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: retl
;
; X64-SSE-LABEL: mul_2xi16_varconst3:
@@ -2164,13 +2125,14 @@ define void @mul_2xi16_varconst4(i8* noc
; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
; X86-SSE-NEXT: psrad $16, %xmm0
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = <0,32768,u,u>
-; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
-; X86-SSE-NEXT: pmuludq %xmm1, %xmm3
-; X86-SSE-NEXT: pmuludq %xmm2, %xmm0
-; X86-SSE-NEXT: movd %xmm0, (%edx,%eax,4)
-; X86-SSE-NEXT: movd %xmm3, 4(%edx,%eax,4)
+; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = <0,32768,u,u>
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; X86-SSE-NEXT: pmuludq %xmm1, %xmm0
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X86-SSE-NEXT: pmuludq %xmm2, %xmm1
+; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4)
; X86-SSE-NEXT: retl
;
; X86-AVX-LABEL: mul_2xi16_varconst4:
@@ -2186,8 +2148,7 @@ define void @mul_2xi16_varconst4(i8* noc
; X86-AVX-NEXT: vmovd %ecx, %xmm0
; X86-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0
; X86-AVX-NEXT: vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
-; X86-AVX-NEXT: vpextrd $1, %xmm0, 4(%edx,%eax,4)
-; X86-AVX-NEXT: vmovd %xmm0, (%edx,%eax,4)
+; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4)
; X86-AVX-NEXT: popl %esi
; X86-AVX-NEXT: .cfi_def_cfa_offset 4
; X86-AVX-NEXT: retl
Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-v2i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-v2i32.ll?rev=346908&r1=346907&r2=346908&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-v2i32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-v2i32.ll Wed Nov 14 15:02:09 2018
@@ -88,31 +88,33 @@ define void @test_udiv7_v2i32(<2 x i32>*
;
; X86_WIDEN-LABEL: test_udiv7_v2i32:
; X86_WIDEN: # %bb.0:
-; X86_WIDEN-NEXT: pushl %ebx
; X86_WIDEN-NEXT: pushl %edi
; X86_WIDEN-NEXT: pushl %esi
-; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86_WIDEN-NEXT: movl (%eax), %ecx
-; X86_WIDEN-NEXT: movl 4(%eax), %esi
-; X86_WIDEN-NEXT: movl $613566757, %ebx # imm = 0x24924925
+; X86_WIDEN-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86_WIDEN-NEXT: movd %xmm0, %ecx
+; X86_WIDEN-NEXT: movl $613566757, %edi # imm = 0x24924925
; X86_WIDEN-NEXT: movl %ecx, %eax
-; X86_WIDEN-NEXT: mull %ebx
+; X86_WIDEN-NEXT: mull %edi
; X86_WIDEN-NEXT: subl %edx, %ecx
; X86_WIDEN-NEXT: shrl %ecx
; X86_WIDEN-NEXT: addl %edx, %ecx
; X86_WIDEN-NEXT: shrl $2, %ecx
-; X86_WIDEN-NEXT: movl %esi, %eax
-; X86_WIDEN-NEXT: mull %ebx
-; X86_WIDEN-NEXT: subl %edx, %esi
-; X86_WIDEN-NEXT: shrl %esi
-; X86_WIDEN-NEXT: addl %edx, %esi
-; X86_WIDEN-NEXT: shrl $2, %esi
-; X86_WIDEN-NEXT: movl %esi, 4(%edi)
-; X86_WIDEN-NEXT: movl %ecx, (%edi)
+; X86_WIDEN-NEXT: movd %ecx, %xmm1
+; X86_WIDEN-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X86_WIDEN-NEXT: movd %xmm0, %ecx
+; X86_WIDEN-NEXT: movl %ecx, %eax
+; X86_WIDEN-NEXT: mull %edi
+; X86_WIDEN-NEXT: subl %edx, %ecx
+; X86_WIDEN-NEXT: shrl %ecx
+; X86_WIDEN-NEXT: addl %edx, %ecx
+; X86_WIDEN-NEXT: shrl $2, %ecx
+; X86_WIDEN-NEXT: movd %ecx, %xmm0
+; X86_WIDEN-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86_WIDEN-NEXT: movq %xmm1, (%esi)
; X86_WIDEN-NEXT: popl %esi
; X86_WIDEN-NEXT: popl %edi
-; X86_WIDEN-NEXT: popl %ebx
; X86_WIDEN-NEXT: retl
%a = load <2 x i32>, <2 x i32>* %x
%b = udiv <2 x i32> %a, <i32 7, i32 7>
@@ -230,27 +232,28 @@ define void @test_urem7_v2i32(<2 x i32>*
;
; X86_WIDEN-LABEL: test_urem7_v2i32:
; X86_WIDEN: # %bb.0:
-; X86_WIDEN-NEXT: pushl %ebp
-; X86_WIDEN-NEXT: pushl %ebx
; X86_WIDEN-NEXT: pushl %edi
; X86_WIDEN-NEXT: pushl %esi
-; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86_WIDEN-NEXT: movl (%eax), %esi
-; X86_WIDEN-NEXT: movl 4(%eax), %ecx
-; X86_WIDEN-NEXT: movl $613566757, %ebx # imm = 0x24924925
-; X86_WIDEN-NEXT: movl %esi, %eax
-; X86_WIDEN-NEXT: mull %ebx
-; X86_WIDEN-NEXT: movl %esi, %ebp
-; X86_WIDEN-NEXT: subl %edx, %ebp
-; X86_WIDEN-NEXT: shrl %ebp
-; X86_WIDEN-NEXT: addl %edx, %ebp
-; X86_WIDEN-NEXT: shrl $2, %ebp
-; X86_WIDEN-NEXT: leal (,%ebp,8), %eax
-; X86_WIDEN-NEXT: subl %eax, %ebp
-; X86_WIDEN-NEXT: addl %esi, %ebp
+; X86_WIDEN-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86_WIDEN-NEXT: movd %xmm0, %ecx
+; X86_WIDEN-NEXT: movl $613566757, %edi # imm = 0x24924925
+; X86_WIDEN-NEXT: movl %ecx, %eax
+; X86_WIDEN-NEXT: mull %edi
+; X86_WIDEN-NEXT: movl %ecx, %eax
+; X86_WIDEN-NEXT: subl %edx, %eax
+; X86_WIDEN-NEXT: shrl %eax
+; X86_WIDEN-NEXT: addl %edx, %eax
+; X86_WIDEN-NEXT: shrl $2, %eax
+; X86_WIDEN-NEXT: leal (,%eax,8), %edx
+; X86_WIDEN-NEXT: subl %edx, %eax
+; X86_WIDEN-NEXT: addl %ecx, %eax
+; X86_WIDEN-NEXT: movd %eax, %xmm1
+; X86_WIDEN-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X86_WIDEN-NEXT: movd %xmm0, %ecx
; X86_WIDEN-NEXT: movl %ecx, %eax
-; X86_WIDEN-NEXT: mull %ebx
+; X86_WIDEN-NEXT: mull %edi
; X86_WIDEN-NEXT: movl %ecx, %eax
; X86_WIDEN-NEXT: subl %edx, %eax
; X86_WIDEN-NEXT: shrl %eax
@@ -259,12 +262,11 @@ define void @test_urem7_v2i32(<2 x i32>*
; X86_WIDEN-NEXT: leal (,%eax,8), %edx
; X86_WIDEN-NEXT: subl %edx, %eax
; X86_WIDEN-NEXT: addl %ecx, %eax
-; X86_WIDEN-NEXT: movl %eax, 4(%edi)
-; X86_WIDEN-NEXT: movl %ebp, (%edi)
+; X86_WIDEN-NEXT: movd %eax, %xmm0
+; X86_WIDEN-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86_WIDEN-NEXT: movq %xmm1, (%esi)
; X86_WIDEN-NEXT: popl %esi
; X86_WIDEN-NEXT: popl %edi
-; X86_WIDEN-NEXT: popl %ebx
-; X86_WIDEN-NEXT: popl %ebp
; X86_WIDEN-NEXT: retl
%a = load <2 x i32>, <2 x i32>* %x
%b = urem <2 x i32> %a, <i32 7, i32 7>
@@ -369,36 +371,37 @@ define void @test_sdiv7_v2i32(<2 x i32>*
;
; X86_WIDEN-LABEL: test_sdiv7_v2i32:
; X86_WIDEN: # %bb.0:
-; X86_WIDEN-NEXT: pushl %ebp
; X86_WIDEN-NEXT: pushl %ebx
; X86_WIDEN-NEXT: pushl %edi
; X86_WIDEN-NEXT: pushl %esi
-; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86_WIDEN-NEXT: movl (%eax), %ecx
-; X86_WIDEN-NEXT: movl 4(%eax), %esi
-; X86_WIDEN-NEXT: movl $-1840700269, %ebp # imm = 0x92492493
-; X86_WIDEN-NEXT: movl %ecx, %eax
-; X86_WIDEN-NEXT: imull %ebp
-; X86_WIDEN-NEXT: movl %edx, %edi
-; X86_WIDEN-NEXT: addl %ecx, %edi
-; X86_WIDEN-NEXT: movl %edi, %eax
-; X86_WIDEN-NEXT: shrl $31, %eax
-; X86_WIDEN-NEXT: sarl $2, %edi
-; X86_WIDEN-NEXT: addl %eax, %edi
+; X86_WIDEN-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86_WIDEN-NEXT: movd %xmm0, %ecx
+; X86_WIDEN-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X86_WIDEN-NEXT: movd %xmm0, %esi
+; X86_WIDEN-NEXT: movl $-1840700269, %ebx # imm = 0x92492493
; X86_WIDEN-NEXT: movl %esi, %eax
-; X86_WIDEN-NEXT: imull %ebp
+; X86_WIDEN-NEXT: imull %ebx
; X86_WIDEN-NEXT: addl %esi, %edx
; X86_WIDEN-NEXT: movl %edx, %eax
; X86_WIDEN-NEXT: shrl $31, %eax
; X86_WIDEN-NEXT: sarl $2, %edx
; X86_WIDEN-NEXT: addl %eax, %edx
-; X86_WIDEN-NEXT: movl %edx, 4(%ebx)
-; X86_WIDEN-NEXT: movl %edi, (%ebx)
+; X86_WIDEN-NEXT: movd %edx, %xmm0
+; X86_WIDEN-NEXT: movl %ecx, %eax
+; X86_WIDEN-NEXT: imull %ebx
+; X86_WIDEN-NEXT: addl %ecx, %edx
+; X86_WIDEN-NEXT: movl %edx, %eax
+; X86_WIDEN-NEXT: shrl $31, %eax
+; X86_WIDEN-NEXT: sarl $2, %edx
+; X86_WIDEN-NEXT: addl %eax, %edx
+; X86_WIDEN-NEXT: movd %edx, %xmm1
+; X86_WIDEN-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86_WIDEN-NEXT: movq %xmm1, (%edi)
; X86_WIDEN-NEXT: popl %esi
; X86_WIDEN-NEXT: popl %edi
; X86_WIDEN-NEXT: popl %ebx
-; X86_WIDEN-NEXT: popl %ebp
; X86_WIDEN-NEXT: retl
%a = load <2 x i32>, <2 x i32>* %x
%b = sdiv <2 x i32> %a, <i32 7, i32 7>
@@ -521,28 +524,29 @@ define void @test_srem7_v2i32(<2 x i32>*
;
; X86_WIDEN-LABEL: test_srem7_v2i32:
; X86_WIDEN: # %bb.0:
-; X86_WIDEN-NEXT: pushl %ebp
; X86_WIDEN-NEXT: pushl %ebx
; X86_WIDEN-NEXT: pushl %edi
; X86_WIDEN-NEXT: pushl %esi
-; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86_WIDEN-NEXT: movl (%eax), %edi
-; X86_WIDEN-NEXT: movl 4(%eax), %ecx
-; X86_WIDEN-NEXT: movl $-1840700269, %ebp # imm = 0x92492493
-; X86_WIDEN-NEXT: movl %edi, %eax
-; X86_WIDEN-NEXT: imull %ebp
-; X86_WIDEN-NEXT: movl %edx, %esi
-; X86_WIDEN-NEXT: addl %edi, %esi
+; X86_WIDEN-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86_WIDEN-NEXT: movd %xmm0, %ecx
+; X86_WIDEN-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X86_WIDEN-NEXT: movd %xmm0, %esi
+; X86_WIDEN-NEXT: movl $-1840700269, %edi # imm = 0x92492493
; X86_WIDEN-NEXT: movl %esi, %eax
+; X86_WIDEN-NEXT: imull %edi
+; X86_WIDEN-NEXT: addl %esi, %edx
+; X86_WIDEN-NEXT: movl %edx, %eax
; X86_WIDEN-NEXT: shrl $31, %eax
-; X86_WIDEN-NEXT: sarl $2, %esi
-; X86_WIDEN-NEXT: addl %eax, %esi
-; X86_WIDEN-NEXT: leal (,%esi,8), %eax
-; X86_WIDEN-NEXT: subl %eax, %esi
-; X86_WIDEN-NEXT: addl %edi, %esi
+; X86_WIDEN-NEXT: sarl $2, %edx
+; X86_WIDEN-NEXT: addl %eax, %edx
+; X86_WIDEN-NEXT: leal (,%edx,8), %eax
+; X86_WIDEN-NEXT: subl %eax, %edx
+; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86_WIDEN-NEXT: addl %esi, %edx
+; X86_WIDEN-NEXT: movd %edx, %xmm0
; X86_WIDEN-NEXT: movl %ecx, %eax
-; X86_WIDEN-NEXT: imull %ebp
+; X86_WIDEN-NEXT: imull %edi
; X86_WIDEN-NEXT: addl %ecx, %edx
; X86_WIDEN-NEXT: movl %edx, %eax
; X86_WIDEN-NEXT: shrl $31, %eax
@@ -551,12 +555,12 @@ define void @test_srem7_v2i32(<2 x i32>*
; X86_WIDEN-NEXT: leal (,%edx,8), %eax
; X86_WIDEN-NEXT: subl %eax, %edx
; X86_WIDEN-NEXT: addl %ecx, %edx
-; X86_WIDEN-NEXT: movl %edx, 4(%ebx)
-; X86_WIDEN-NEXT: movl %esi, (%ebx)
+; X86_WIDEN-NEXT: movd %edx, %xmm1
+; X86_WIDEN-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86_WIDEN-NEXT: movq %xmm1, (%ebx)
; X86_WIDEN-NEXT: popl %esi
; X86_WIDEN-NEXT: popl %edi
; X86_WIDEN-NEXT: popl %ebx
-; X86_WIDEN-NEXT: popl %ebp
; X86_WIDEN-NEXT: retl
%a = load <2 x i32>, <2 x i32>* %x
%b = srem <2 x i32> %a, <i32 7, i32 7>
@@ -600,9 +604,7 @@ define void @test_udiv_pow2_v2i32(<2 x i
; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86_WIDEN-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X86_WIDEN-NEXT: psrld $3, %xmm0
-; X86_WIDEN-NEXT: movd %xmm0, (%eax)
-; X86_WIDEN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X86_WIDEN-NEXT: movd %xmm0, 4(%eax)
+; X86_WIDEN-NEXT: movq %xmm0, (%eax)
; X86_WIDEN-NEXT: retl
%a = load <2 x i32>, <2 x i32>* %x
%b = udiv <2 x i32> %a, <i32 8, i32 8>
@@ -645,11 +647,9 @@ define void @test_urem_pow2_v2i32(<2 x i
; X86_WIDEN: # %bb.0:
; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86_WIDEN-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X86_WIDEN-NEXT: pand {{\.LCPI.*}}, %xmm0
-; X86_WIDEN-NEXT: movd %xmm0, (%eax)
-; X86_WIDEN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X86_WIDEN-NEXT: movd %xmm0, 4(%eax)
+; X86_WIDEN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X86_WIDEN-NEXT: andps {{\.LCPI.*}}, %xmm0
+; X86_WIDEN-NEXT: movlps %xmm0, (%eax)
; X86_WIDEN-NEXT: retl
%a = load <2 x i32>, <2 x i32>* %x
%b = urem <2 x i32> %a, <i32 8, i32 8>
@@ -741,9 +741,7 @@ define void @test_sdiv_pow2_v2i32(<2 x i
; X86_WIDEN-NEXT: psrld $29, %xmm1
; X86_WIDEN-NEXT: paddd %xmm0, %xmm1
; X86_WIDEN-NEXT: psrad $3, %xmm1
-; X86_WIDEN-NEXT: movd %xmm1, (%eax)
-; X86_WIDEN-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; X86_WIDEN-NEXT: movd %xmm0, 4(%eax)
+; X86_WIDEN-NEXT: movq %xmm1, (%eax)
; X86_WIDEN-NEXT: retl
%a = load <2 x i32>, <2 x i32>* %x
%b = sdiv <2 x i32> %a, <i32 8, i32 8>
@@ -787,9 +785,7 @@ define void @test_srem_pow2_v2i32(<2 x i
; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86_WIDEN-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X86_WIDEN-NEXT: psrld $3, %xmm0
-; X86_WIDEN-NEXT: movd %xmm0, (%eax)
-; X86_WIDEN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X86_WIDEN-NEXT: movd %xmm0, 4(%eax)
+; X86_WIDEN-NEXT: movq %xmm0, (%eax)
; X86_WIDEN-NEXT: retl
%a = load <2 x i32>, <2 x i32>* %x
%b = udiv <2 x i32> %a, <i32 8, i32 8>
@@ -874,25 +870,27 @@ define void @test_udiv_v2i32(<2 x i32>*
;
; X86_WIDEN-LABEL: test_udiv_v2i32:
; X86_WIDEN: # %bb.0:
-; X86_WIDEN-NEXT: pushl %ebx
-; X86_WIDEN-NEXT: pushl %edi
; X86_WIDEN-NEXT: pushl %esi
-; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86_WIDEN-NEXT: movl (%ecx), %eax
-; X86_WIDEN-NEXT: movl 4(%ecx), %ecx
+; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86_WIDEN-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86_WIDEN-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X86_WIDEN-NEXT: movd %xmm0, %eax
+; X86_WIDEN-NEXT: movd %xmm1, %esi
; X86_WIDEN-NEXT: xorl %edx, %edx
-; X86_WIDEN-NEXT: divl (%ebx)
-; X86_WIDEN-NEXT: movl %eax, %esi
-; X86_WIDEN-NEXT: movl %ecx, %eax
+; X86_WIDEN-NEXT: divl %esi
+; X86_WIDEN-NEXT: movd %eax, %xmm2
+; X86_WIDEN-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X86_WIDEN-NEXT: movd %xmm0, %eax
+; X86_WIDEN-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
+; X86_WIDEN-NEXT: movd %xmm1, %esi
; X86_WIDEN-NEXT: xorl %edx, %edx
-; X86_WIDEN-NEXT: divl 4(%ebx)
-; X86_WIDEN-NEXT: movl %eax, 4(%edi)
-; X86_WIDEN-NEXT: movl %esi, (%edi)
+; X86_WIDEN-NEXT: divl %esi
+; X86_WIDEN-NEXT: movd %eax, %xmm0
+; X86_WIDEN-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; X86_WIDEN-NEXT: movq %xmm2, (%ecx)
; X86_WIDEN-NEXT: popl %esi
-; X86_WIDEN-NEXT: popl %edi
-; X86_WIDEN-NEXT: popl %ebx
; X86_WIDEN-NEXT: retl
%a = load <2 x i32>, <2 x i32>* %x
%b = load <2 x i32>, <2 x i32>* %y
@@ -978,25 +976,27 @@ define void @test_urem_v2i32(<2 x i32>*
;
; X86_WIDEN-LABEL: test_urem_v2i32:
; X86_WIDEN: # %bb.0:
-; X86_WIDEN-NEXT: pushl %ebx
-; X86_WIDEN-NEXT: pushl %edi
; X86_WIDEN-NEXT: pushl %esi
-; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86_WIDEN-NEXT: movl (%ecx), %eax
-; X86_WIDEN-NEXT: movl 4(%ecx), %ecx
+; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86_WIDEN-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86_WIDEN-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X86_WIDEN-NEXT: movd %xmm0, %eax
+; X86_WIDEN-NEXT: movd %xmm1, %esi
; X86_WIDEN-NEXT: xorl %edx, %edx
-; X86_WIDEN-NEXT: divl (%ebx)
-; X86_WIDEN-NEXT: movl %edx, %esi
-; X86_WIDEN-NEXT: movl %ecx, %eax
+; X86_WIDEN-NEXT: divl %esi
+; X86_WIDEN-NEXT: movd %edx, %xmm2
+; X86_WIDEN-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X86_WIDEN-NEXT: movd %xmm0, %eax
+; X86_WIDEN-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
+; X86_WIDEN-NEXT: movd %xmm1, %esi
; X86_WIDEN-NEXT: xorl %edx, %edx
-; X86_WIDEN-NEXT: divl 4(%ebx)
-; X86_WIDEN-NEXT: movl %edx, 4(%edi)
-; X86_WIDEN-NEXT: movl %esi, (%edi)
+; X86_WIDEN-NEXT: divl %esi
+; X86_WIDEN-NEXT: movd %edx, %xmm0
+; X86_WIDEN-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; X86_WIDEN-NEXT: movq %xmm2, (%ecx)
; X86_WIDEN-NEXT: popl %esi
-; X86_WIDEN-NEXT: popl %edi
-; X86_WIDEN-NEXT: popl %ebx
; X86_WIDEN-NEXT: retl
%a = load <2 x i32>, <2 x i32>* %x
%b = load <2 x i32>, <2 x i32>* %y
@@ -1085,19 +1085,26 @@ define void @test_sdiv_v2i32(<2 x i32>*
; X86_WIDEN-NEXT: pushl %ebx
; X86_WIDEN-NEXT: pushl %edi
; X86_WIDEN-NEXT: pushl %esi
-; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86_WIDEN-NEXT: movl (%ecx), %eax
-; X86_WIDEN-NEXT: movl 4(%ecx), %ecx
+; X86_WIDEN-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86_WIDEN-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X86_WIDEN-NEXT: movd %xmm0, %ecx
+; X86_WIDEN-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X86_WIDEN-NEXT: movd %xmm0, %eax
+; X86_WIDEN-NEXT: movd %xmm1, %edi
+; X86_WIDEN-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
+; X86_WIDEN-NEXT: movd %xmm1, %ebx
; X86_WIDEN-NEXT: cltd
-; X86_WIDEN-NEXT: idivl (%ebx)
-; X86_WIDEN-NEXT: movl %eax, %esi
+; X86_WIDEN-NEXT: idivl %ebx
+; X86_WIDEN-NEXT: movd %eax, %xmm0
; X86_WIDEN-NEXT: movl %ecx, %eax
; X86_WIDEN-NEXT: cltd
-; X86_WIDEN-NEXT: idivl 4(%ebx)
-; X86_WIDEN-NEXT: movl %eax, 4(%edi)
-; X86_WIDEN-NEXT: movl %esi, (%edi)
+; X86_WIDEN-NEXT: idivl %edi
+; X86_WIDEN-NEXT: movd %eax, %xmm1
+; X86_WIDEN-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86_WIDEN-NEXT: movq %xmm1, (%esi)
; X86_WIDEN-NEXT: popl %esi
; X86_WIDEN-NEXT: popl %edi
; X86_WIDEN-NEXT: popl %ebx
@@ -1189,19 +1196,26 @@ define void @test_srem_v2i32(<2 x i32>*
; X86_WIDEN-NEXT: pushl %ebx
; X86_WIDEN-NEXT: pushl %edi
; X86_WIDEN-NEXT: pushl %esi
-; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86_WIDEN-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86_WIDEN-NEXT: movl (%ecx), %eax
-; X86_WIDEN-NEXT: movl 4(%ecx), %ecx
+; X86_WIDEN-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86_WIDEN-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X86_WIDEN-NEXT: movd %xmm0, %ecx
+; X86_WIDEN-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X86_WIDEN-NEXT: movd %xmm0, %eax
+; X86_WIDEN-NEXT: movd %xmm1, %edi
+; X86_WIDEN-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
+; X86_WIDEN-NEXT: movd %xmm1, %ebx
; X86_WIDEN-NEXT: cltd
-; X86_WIDEN-NEXT: idivl (%ebx)
-; X86_WIDEN-NEXT: movl %eax, %esi
+; X86_WIDEN-NEXT: idivl %ebx
+; X86_WIDEN-NEXT: movd %eax, %xmm0
; X86_WIDEN-NEXT: movl %ecx, %eax
; X86_WIDEN-NEXT: cltd
-; X86_WIDEN-NEXT: idivl 4(%ebx)
-; X86_WIDEN-NEXT: movl %eax, 4(%edi)
-; X86_WIDEN-NEXT: movl %esi, (%edi)
+; X86_WIDEN-NEXT: idivl %edi
+; X86_WIDEN-NEXT: movd %eax, %xmm1
+; X86_WIDEN-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86_WIDEN-NEXT: movq %xmm1, (%esi)
; X86_WIDEN-NEXT: popl %esi
; X86_WIDEN-NEXT: popl %edi
; X86_WIDEN-NEXT: popl %ebx
Modified: llvm/trunk/test/CodeGen/X86/widen_cast-4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_cast-4.ll?rev=346908&r1=346907&r2=346908&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_cast-4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_cast-4.ll Wed Nov 14 15:02:09 2018
@@ -57,15 +57,13 @@ define void @update(i64* %dst_i, i64* %s
; WIDE-NEXT: movl %edx, {{[0-9]+}}(%esp)
; WIDE-NEXT: addl {{[0-9]+}}(%esp), %ecx
; WIDE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; WIDE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; WIDE-NEXT: pinsrd $1, 4(%ecx,%eax,8), %xmm3
+; WIDE-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
; WIDE-NEXT: psubb %xmm0, %xmm3
; WIDE-NEXT: psrlw $2, %xmm3
; WIDE-NEXT: pand %xmm1, %xmm3
; WIDE-NEXT: pxor %xmm2, %xmm3
; WIDE-NEXT: psubb %xmm2, %xmm3
-; WIDE-NEXT: pextrd $1, %xmm3, 4(%edx,%eax,8)
-; WIDE-NEXT: movd %xmm3, (%edx,%eax,8)
+; WIDE-NEXT: movq %xmm3, (%edx,%eax,8)
; WIDE-NEXT: incl (%esp)
; WIDE-NEXT: .LBB0_1: # %forcond
; WIDE-NEXT: # =>This Inner Loop Header: Depth=1
More information about the llvm-commits
mailing list