[llvm] 5ca7754 - [X86] Fold scalar_to_vector(i64 zext(x)) -> bitcast(vzext_movl(scalar_to_vector(i32 x)))
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 21 02:47:52 PDT 2022
Author: Simon Pilgrim
Date: 2022-10-21T10:40:13+01:00
New Revision: 5ca77541446d7040638b53e0ddff3f76ee005681
URL: https://github.com/llvm/llvm-project/commit/5ca77541446d7040638b53e0ddff3f76ee005681
DIFF: https://github.com/llvm/llvm-project/commit/5ca77541446d7040638b53e0ddff3f76ee005681.diff
LOG: [X86] Fold scalar_to_vector(i64 zext(x)) -> bitcast(vzext_movl(scalar_to_vector(i32 x)))
Extends existing anyextend fold to make use of the implicit zero-extension of the movd instruction
This also helps replace some nasty xmm->gpr->xmm traffic with a shuffle pattern instead
Noticed while looking at D130953
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/buildvec-extract.ll
llvm/test/CodeGen/X86/combine-pmuldq.ll
llvm/test/CodeGen/X86/fast-isel-bitcast-crash.ll
llvm/test/CodeGen/X86/gather-addresses.ll
llvm/test/CodeGen/X86/insertelement-var-index.ll
llvm/test/CodeGen/X86/load-scalar-as-vector.ll
llvm/test/CodeGen/X86/vec_insert-7.ll
llvm/test/CodeGen/X86/vec_set-B.ll
llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d219f82a7a97a..4796c22f19080 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -55036,25 +55036,37 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src.getOperand(0),
Src.getOperand(1));
- // Reduce v2i64 to v4i32 if we don't need the upper bits.
+ // Reduce v2i64 to v4i32 if we don't need the upper bits or are known zero.
// TODO: Move to DAGCombine/SimplifyDemandedBits?
- if (VT == MVT::v2i64 || VT == MVT::v2f64) {
- auto IsAnyExt64 = [](SDValue Op) {
- if (Op.getValueType() != MVT::i64 || !Op.hasOneUse())
+ if ((VT == MVT::v2i64 || VT == MVT::v2f64) && Src.hasOneUse()) {
+ auto IsExt64 = [&DAG](SDValue Op, bool IsZeroExt) {
+ if (Op.getValueType() != MVT::i64)
return SDValue();
- if (Op.getOpcode() == ISD::ANY_EXTEND &&
+ unsigned Opc = IsZeroExt ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND;
+ if (Op.getOpcode() == Opc &&
Op.getOperand(0).getScalarValueSizeInBits() <= 32)
return Op.getOperand(0);
+ unsigned Ext = IsZeroExt ? ISD::ZEXTLOAD : ISD::EXTLOAD;
if (auto *Ld = dyn_cast<LoadSDNode>(Op))
- if (Ld->getExtensionType() == ISD::EXTLOAD &&
+ if (Ld->getExtensionType() == Ext &&
Ld->getMemoryVT().getScalarSizeInBits() <= 32)
return Op;
+ if (IsZeroExt && DAG.MaskedValueIsZero(Op, APInt::getHighBitsSet(64, 32)))
+ return Op;
return SDValue();
};
- if (SDValue ExtSrc = IsAnyExt64(peekThroughOneUseBitcasts(Src)))
+
+ if (SDValue AnyExt = IsExt64(peekThroughOneUseBitcasts(Src), false))
return DAG.getBitcast(
VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i32,
- DAG.getAnyExtOrTrunc(ExtSrc, DL, MVT::i32)));
+ DAG.getAnyExtOrTrunc(AnyExt, DL, MVT::i32)));
+
+ if (SDValue ZeroExt = IsExt64(peekThroughOneUseBitcasts(Src), true))
+ return DAG.getBitcast(
+ VT,
+ DAG.getNode(X86ISD::VZEXT_MOVL, DL, MVT::v4i32,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i32,
+ DAG.getZExtOrTrunc(ZeroExt, DL, MVT::i32))));
}
// Combine (v2i64 (scalar_to_vector (i64 (bitconvert (mmx))))) to MOVQ2DQ.
diff --git a/llvm/test/CodeGen/X86/buildvec-extract.ll b/llvm/test/CodeGen/X86/buildvec-extract.ll
index 4826b959e39b1..ebd027b979e73 100644
--- a/llvm/test/CodeGen/X86/buildvec-extract.ll
+++ b/llvm/test/CodeGen/X86/buildvec-extract.ll
@@ -198,17 +198,25 @@ define <2 x i64> @extract0_i32_zext_insert1_i64_undef(<4 x i32> %x) {
}
define <2 x i64> @extract0_i32_zext_insert1_i64_zero(<4 x i32> %x) {
-; SSE-LABEL: extract0_i32_zext_insert1_i64_zero:
-; SSE: # %bb.0:
-; SSE-NEXT: movd %xmm0, %eax
-; SSE-NEXT: movq %rax, %xmm0
-; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
-; SSE-NEXT: retq
+; SSE2-LABEL: extract0_i32_zext_insert1_i64_zero:
+; SSE2: # %bb.0:
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
+; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: extract0_i32_zext_insert1_i64_zero:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
+; SSE41-NEXT: retq
;
; AVX-LABEL: extract0_i32_zext_insert1_i64_zero:
; AVX: # %bb.0:
-; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 0
@@ -242,24 +250,18 @@ define <2 x i64> @extract1_i32_zext_insert1_i64_undef(<4 x i32> %x) {
define <2 x i64> @extract1_i32_zext_insert1_i64_zero(<4 x i32> %x) {
; SSE2-LABEL: extract1_i32_zext_insert1_i64_zero:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: movq %rax, %xmm0
+; SSE2-NEXT: psrlq $32, %xmm0
; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract1_i32_zext_insert1_i64_zero:
; SSE41: # %bb.0:
-; SSE41-NEXT: extractps $1, %xmm0, %eax
-; SSE41-NEXT: movq %rax, %xmm0
-; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[4,5,6,7],zero,zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: extract1_i32_zext_insert1_i64_zero:
; AVX: # %bb.0:
-; AVX-NEXT: vextractps $1, %xmm0, %eax
-; AVX-NEXT: vmovq %rax, %xmm0
-; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[4,5,6,7],zero,zero,zero,zero
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 1
%z = zext i32 %e to i64
@@ -330,25 +332,15 @@ define <2 x i64> @extract3_i32_zext_insert1_i64_undef(<4 x i32> %x) {
}
define <2 x i64> @extract3_i32_zext_insert1_i64_zero(<4 x i32> %x) {
-; SSE2-LABEL: extract3_i32_zext_insert1_i64_zero:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: movq %rax, %xmm0
-; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: extract3_i32_zext_insert1_i64_zero:
-; SSE41: # %bb.0:
-; SSE41-NEXT: extractps $3, %xmm0, %eax
-; SSE41-NEXT: movq %rax, %xmm0
-; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
-; SSE41-NEXT: retq
+; SSE-LABEL: extract3_i32_zext_insert1_i64_zero:
+; SSE: # %bb.0:
+; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
+; SSE-NEXT: retq
;
; AVX-LABEL: extract3_i32_zext_insert1_i64_zero:
; AVX: # %bb.0:
-; AVX-NEXT: vextractps $3, %xmm0, %eax
-; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 3
@@ -538,17 +530,22 @@ define <2 x i64> @extract0_i16_zext_insert1_i64_undef(<8 x i16> %x) {
}
define <2 x i64> @extract0_i16_zext_insert1_i64_zero(<8 x i16> %x) {
-; SSE-LABEL: extract0_i16_zext_insert1_i64_zero:
-; SSE: # %bb.0:
-; SSE-NEXT: pextrw $0, %xmm0, %eax
-; SSE-NEXT: movq %rax, %xmm0
-; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
-; SSE-NEXT: retq
+; SSE2-LABEL: extract0_i16_zext_insert1_i64_zero:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pextrw $0, %xmm0, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: extract0_i16_zext_insert1_i64_zero:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
+; SSE41-NEXT: retq
;
; AVX-LABEL: extract0_i16_zext_insert1_i64_zero:
; AVX: # %bb.0:
-; AVX-NEXT: vpextrw $0, %xmm0, %eax
-; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 0
@@ -581,18 +578,21 @@ define <2 x i64> @extract1_i16_zext_insert1_i64_undef(<8 x i16> %x) {
}
define <2 x i64> @extract1_i16_zext_insert1_i64_zero(<8 x i16> %x) {
-; SSE-LABEL: extract1_i16_zext_insert1_i64_zero:
-; SSE: # %bb.0:
-; SSE-NEXT: pextrw $1, %xmm0, %eax
-; SSE-NEXT: movq %rax, %xmm0
-; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
-; SSE-NEXT: retq
+; SSE2-LABEL: extract1_i16_zext_insert1_i64_zero:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pextrw $1, %xmm0, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: extract1_i16_zext_insert1_i64_zero:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[2,3],zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: retq
;
; AVX-LABEL: extract1_i16_zext_insert1_i64_zero:
; AVX: # %bb.0:
-; AVX-NEXT: vpextrw $1, %xmm0, %eax
-; AVX-NEXT: vmovq %rax, %xmm0
-; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[2,3],zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 1
%z = zext i16 %e to i64
@@ -628,18 +628,21 @@ define <2 x i64> @extract2_i16_zext_insert1_i64_undef(<8 x i16> %x) {
}
define <2 x i64> @extract2_i16_zext_insert1_i64_zero(<8 x i16> %x) {
-; SSE-LABEL: extract2_i16_zext_insert1_i64_zero:
-; SSE: # %bb.0:
-; SSE-NEXT: pextrw $2, %xmm0, %eax
-; SSE-NEXT: movq %rax, %xmm0
-; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
-; SSE-NEXT: retq
+; SSE2-LABEL: extract2_i16_zext_insert1_i64_zero:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pextrw $2, %xmm0, %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: extract2_i16_zext_insert1_i64_zero:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[4,5],zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: retq
;
; AVX-LABEL: extract2_i16_zext_insert1_i64_zero:
; AVX: # %bb.0:
-; AVX-NEXT: vpextrw $2, %xmm0, %eax
-; AVX-NEXT: vmovq %rax, %xmm0
-; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[4,5],zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 2
%z = zext i16 %e to i64
@@ -674,18 +677,20 @@ define <2 x i64> @extract3_i16_zext_insert1_i64_undef(<8 x i16> %x) {
}
define <2 x i64> @extract3_i16_zext_insert1_i64_zero(<8 x i16> %x) {
-; SSE-LABEL: extract3_i16_zext_insert1_i64_zero:
-; SSE: # %bb.0:
-; SSE-NEXT: pextrw $3, %xmm0, %eax
-; SSE-NEXT: movq %rax, %xmm0
-; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
-; SSE-NEXT: retq
+; SSE2-LABEL: extract3_i16_zext_insert1_i64_zero:
+; SSE2: # %bb.0:
+; SSE2-NEXT: psrlq $48, %xmm0
+; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: extract3_i16_zext_insert1_i64_zero:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7],zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: retq
;
; AVX-LABEL: extract3_i16_zext_insert1_i64_zero:
; AVX: # %bb.0:
-; AVX-NEXT: vpextrw $3, %xmm0, %eax
-; AVX-NEXT: vmovq %rax, %xmm0
-; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7],zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 3
%z = zext i16 %e to i64
diff --git a/llvm/test/CodeGen/X86/combine-pmuldq.ll b/llvm/test/CodeGen/X86/combine-pmuldq.ll
index 806816ec8ea31..c3d23f49439dc 100644
--- a/llvm/test/CodeGen/X86/combine-pmuldq.ll
+++ b/llvm/test/CodeGen/X86/combine-pmuldq.ll
@@ -332,8 +332,7 @@ declare dso_local i32 @foo(i32, i32, i32, i32)
define <8 x i32> @PR49658_zext(ptr %ptr, i32 %mul) {
; SSE-LABEL: PR49658_zext:
; SSE: # %bb.0: # %start
-; SSE-NEXT: movl %esi, %eax
-; SSE-NEXT: movq %rax, %xmm0
+; SSE-NEXT: movd %esi, %xmm0
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
; SSE-NEXT: pxor %xmm0, %xmm0
; SSE-NEXT: movq $-2097152, %rax # imm = 0xFFE00000
diff --git a/llvm/test/CodeGen/X86/fast-isel-bitcast-crash.ll b/llvm/test/CodeGen/X86/fast-isel-bitcast-crash.ll
index c46b5cca558d3..716a93a5aca7d 100644
--- a/llvm/test/CodeGen/X86/fast-isel-bitcast-crash.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-bitcast-crash.ll
@@ -15,8 +15,7 @@
define <8 x i16> @bitcast_crash(i32 %arg, <8 x i16> %x, i1 %c) {
; CHECK-LABEL: bitcast_crash:
; CHECK: # %bb.0: # %bb
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: movq %rax, %xmm1
+; CHECK-NEXT: movd %edi, %xmm1
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
; CHECK-NEXT: testb $1, %sil
; CHECK-NEXT: je .LBB0_2
diff --git a/llvm/test/CodeGen/X86/gather-addresses.ll b/llvm/test/CodeGen/X86/gather-addresses.ll
index 4c4e4b2dae577..a67ce8f0be5b0 100644
--- a/llvm/test/CodeGen/X86/gather-addresses.ll
+++ b/llvm/test/CodeGen/X86/gather-addresses.ll
@@ -149,11 +149,11 @@ define <4 x i64> @old(ptr %p, ptr %i, ptr %h, i64 %f) nounwind {
; LIN-SSE2-NEXT: andl %ecx, %edx
; LIN-SSE2-NEXT: andl %ecx, %esi
; LIN-SSE2-NEXT: andl %ecx, %edi
-; LIN-SSE2-NEXT: movq %rax, %xmm0
-; LIN-SSE2-NEXT: movq %rdx, %xmm1
+; LIN-SSE2-NEXT: movd %eax, %xmm0
+; LIN-SSE2-NEXT: movd %edx, %xmm1
; LIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; LIN-SSE2-NEXT: movq %rdi, %xmm2
-; LIN-SSE2-NEXT: movq %rsi, %xmm1
+; LIN-SSE2-NEXT: movd %edi, %xmm2
+; LIN-SSE2-NEXT: movd %esi, %xmm1
; LIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; LIN-SSE2-NEXT: retq
;
@@ -169,11 +169,11 @@ define <4 x i64> @old(ptr %p, ptr %i, ptr %h, i64 %f) nounwind {
; LIN-SSE4-NEXT: andl %ecx, %edx
; LIN-SSE4-NEXT: andl %ecx, %esi
; LIN-SSE4-NEXT: andl %ecx, %edi
-; LIN-SSE4-NEXT: movq %rdx, %xmm1
-; LIN-SSE4-NEXT: movq %rax, %xmm0
+; LIN-SSE4-NEXT: movd %edx, %xmm1
+; LIN-SSE4-NEXT: movd %eax, %xmm0
; LIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; LIN-SSE4-NEXT: movq %rdi, %xmm2
-; LIN-SSE4-NEXT: movq %rsi, %xmm1
+; LIN-SSE4-NEXT: movd %edi, %xmm2
+; LIN-SSE4-NEXT: movd %esi, %xmm1
; LIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; LIN-SSE4-NEXT: retq
;
@@ -192,11 +192,11 @@ define <4 x i64> @old(ptr %p, ptr %i, ptr %h, i64 %f) nounwind {
; WIN-SSE2-NEXT: andl %r9d, %ecx
; WIN-SSE2-NEXT: andl %r9d, %edx
; WIN-SSE2-NEXT: andl %r9d, %r8d
-; WIN-SSE2-NEXT: movq %rax, %xmm0
-; WIN-SSE2-NEXT: movq %rcx, %xmm1
+; WIN-SSE2-NEXT: movd %eax, %xmm0
+; WIN-SSE2-NEXT: movd %ecx, %xmm1
; WIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; WIN-SSE2-NEXT: movq %r8, %xmm2
-; WIN-SSE2-NEXT: movq %rdx, %xmm1
+; WIN-SSE2-NEXT: movd %r8d, %xmm2
+; WIN-SSE2-NEXT: movd %edx, %xmm1
; WIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; WIN-SSE2-NEXT: retq
;
@@ -212,11 +212,11 @@ define <4 x i64> @old(ptr %p, ptr %i, ptr %h, i64 %f) nounwind {
; WIN-SSE4-NEXT: andl %r9d, %ecx
; WIN-SSE4-NEXT: andl %r9d, %edx
; WIN-SSE4-NEXT: andl %r9d, %r8d
-; WIN-SSE4-NEXT: movq %rcx, %xmm1
-; WIN-SSE4-NEXT: movq %rax, %xmm0
+; WIN-SSE4-NEXT: movd %ecx, %xmm1
+; WIN-SSE4-NEXT: movd %eax, %xmm0
; WIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; WIN-SSE4-NEXT: movq %r8, %xmm2
-; WIN-SSE4-NEXT: movq %rdx, %xmm1
+; WIN-SSE4-NEXT: movd %r8d, %xmm2
+; WIN-SSE4-NEXT: movd %edx, %xmm1
; WIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; WIN-SSE4-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/insertelement-var-index.ll b/llvm/test/CodeGen/X86/insertelement-var-index.ll
index 7e366e3b00ebe..399c160335922 100644
--- a/llvm/test/CodeGen/X86/insertelement-var-index.ll
+++ b/llvm/test/CodeGen/X86/insertelement-var-index.ll
@@ -1101,8 +1101,7 @@ define <2 x double> @arg_f64_v2f64(<2 x double> %v, double %x, i32 %y) nounwind
; SSE41: # %bb.0:
; SSE41-NEXT: movapd %xmm0, %xmm2
; SSE41-NEXT: movddup {{.*#+}} xmm1 = xmm1[0,0]
-; SSE41-NEXT: movl %edi, %eax
-; SSE41-NEXT: movq %rax, %xmm0
+; SSE41-NEXT: movd %edi, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
@@ -1112,8 +1111,7 @@ define <2 x double> @arg_f64_v2f64(<2 x double> %v, double %x, i32 %y) nounwind
; AVX1-LABEL: arg_f64_v2f64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
-; AVX1-NEXT: movl %edi, %eax
-; AVX1-NEXT: vmovq %rax, %xmm2
+; AVX1-NEXT: vmovd %edi, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
@@ -1458,8 +1456,7 @@ define <2 x double> @load_f64_v2f64(<2 x double> %v, ptr %p, i32 %y) nounwind {
; SSE41: # %bb.0:
; SSE41-NEXT: movapd %xmm0, %xmm1
; SSE41-NEXT: movddup {{.*#+}} xmm2 = mem[0,0]
-; SSE41-NEXT: movl %esi, %eax
-; SSE41-NEXT: movq %rax, %xmm0
+; SSE41-NEXT: movd %esi, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
@@ -1469,8 +1466,7 @@ define <2 x double> @load_f64_v2f64(<2 x double> %v, ptr %p, i32 %y) nounwind {
; AVX1-LABEL: load_f64_v2f64:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
-; AVX1-NEXT: movl %esi, %eax
-; AVX1-NEXT: vmovq %rax, %xmm2
+; AVX1-NEXT: vmovd %esi, %xmm2
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/load-scalar-as-vector.ll b/llvm/test/CodeGen/X86/load-scalar-as-vector.ll
index 13b07532ceea4..5ebcde3053a7b 100644
--- a/llvm/test/CodeGen/X86/load-scalar-as-vector.ll
+++ b/llvm/test/CodeGen/X86/load-scalar-as-vector.ll
@@ -255,7 +255,7 @@ define <2 x i64> @lshr_op0_constant(ptr %p) nounwind {
; SSE-NEXT: movzbl (%rdi), %ecx
; SSE-NEXT: movl $42, %eax
; SSE-NEXT: shrq %cl, %rax
-; SSE-NEXT: movq %rax, %xmm0
+; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: lshr_op0_constant:
@@ -263,7 +263,7 @@ define <2 x i64> @lshr_op0_constant(ptr %p) nounwind {
; AVX-NEXT: movzbl (%rdi), %ecx
; AVX-NEXT: movl $42, %eax
; AVX-NEXT: shrq %cl, %rax
-; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: retq
%x = load i64, ptr %p
%b = lshr i64 42, %x
diff --git a/llvm/test/CodeGen/X86/vec_insert-7.ll b/llvm/test/CodeGen/X86/vec_insert-7.ll
index cea047453de43..8fd6e3dd94b67 100644
--- a/llvm/test/CodeGen/X86/vec_insert-7.ll
+++ b/llvm/test/CodeGen/X86/vec_insert-7.ll
@@ -14,8 +14,7 @@ define x86_mmx @mmx_movzl(x86_mmx %x) nounwind {
;
; X64-LABEL: mmx_movzl:
; X64: ## %bb.0:
-; X64-NEXT: movl $32, %eax
-; X64-NEXT: movq %rax, %xmm0
+; X64-NEXT: movaps {{.*#+}} xmm0 = [32,0,0,0]
; X64-NEXT: retq
%tmp = bitcast x86_mmx %x to <2 x i32>
%tmp3 = insertelement <2 x i32> %tmp, i32 32, i32 0
diff --git a/llvm/test/CodeGen/X86/vec_set-B.ll b/llvm/test/CodeGen/X86/vec_set-B.ll
index f4d6b64dc3ac5..0f5c853220b38 100644
--- a/llvm/test/CodeGen/X86/vec_set-B.ll
+++ b/llvm/test/CodeGen/X86/vec_set-B.ll
@@ -38,7 +38,7 @@ define <2 x i64> @test2(i64 %arg) nounwind {
; X64-LABEL: test2:
; X64: # %bb.0:
; X64-NEXT: andl $1234567, %edi # imm = 0x12D687
-; X64-NEXT: movq %rdi, %xmm0
+; X64-NEXT: movd %edi, %xmm0
; X64-NEXT: retq
%A = and i64 %arg, 1234567
%B = insertelement <2 x i64> undef, i64 %A, i32 0
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
index b0e319a402d9f..bc6f2c7006064 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
@@ -385,8 +385,7 @@ define <4 x float> @PR31296(ptr %in) {
;
; X64-LABEL: PR31296:
; X64: # %bb.0: # %entry
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: vmovq %rax, %xmm0
+; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],zero,zero,mem[0]
; X64-NEXT: retq
entry:
More information about the llvm-commits
mailing list