[llvm] d81d451 - [X86] Add DAG combine to replace vXi64 vzext_movl+scalar_to_vector with vYi32 vzext_movl+scalar_to_vector if the upper 32 bits of the scalar are zero.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 7 16:38:07 PST 2020
Author: Craig Topper
Date: 2020-03-07T16:14:26-08:00
New Revision: d81d451442d758dcd4f8d59162cf46c8386bef2d
URL: https://github.com/llvm/llvm-project/commit/d81d451442d758dcd4f8d59162cf46c8386bef2d
DIFF: https://github.com/llvm/llvm-project/commit/d81d451442d758dcd4f8d59162cf46c8386bef2d.diff
LOG: [X86] Add DAG combine to replace vXi64 vzext_movl+scalar_to_vector with vYi32 vzext_movl+scalar_to_vector if the upper 32 bits of the scalar are zero.
We can just use a 32-bit copy and zero in the SSE domain when we
zero the upper bits.
Remove an isel pattern that becomes dead with this.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/lib/Target/X86/X86InstrSSE.td
llvm/test/CodeGen/X86/buildvec-extract.ll
llvm/test/CodeGen/X86/pshufb-mask-comments.ll
llvm/test/CodeGen/X86/vec_set-A.ll
llvm/test/CodeGen/X86/vec_set-B.ll
llvm/test/CodeGen/X86/vector-lzcnt-128.ll
llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
llvm/test/CodeGen/X86/vector-tzcnt-128.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ff3739da2dca..11f9722914db 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35971,6 +35971,24 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
}
}
+ // Turn (v2i64 (vzext_movl (scalar_to_vector (i64 X)))) into
+ // (v2i64 (bitcast (v4i32 (vzext_movl (scalar_to_vector (i32 (trunc X)))))))
+ // if the upper bits of the i64 are zero.
+ if (N->getOpcode() == X86ISD::VZEXT_MOVL && N->getOperand(0).hasOneUse() &&
+ N->getOperand(0)->getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ N->getOperand(0).getOperand(0).hasOneUse() &&
+ N->getOperand(0).getOperand(0).getValueType() == MVT::i64) {
+ SDValue In = N->getOperand(0).getOperand(0);
+ APInt Mask = APInt::getHighBitsSet(64, 32);
+ if (DAG.MaskedValueIsZero(In, Mask)) {
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, In);
+ MVT VecVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2);
+ SDValue SclVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Trunc);
+ SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, dl, VecVT, SclVec);
+ return DAG.getBitcast(VT, Movl);
+ }
+ }
+
return SDValue();
}
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index a080f2f7cb18..12d87511ef8c 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -4447,8 +4447,6 @@ let Predicates = [HasAVX512] in {
(VMOV64toPQIZrr GR64:$src)>;
// AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
- def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
- (VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v4i32 (X86vzload32 addr:$src)),
(VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v8i32 (X86vzload32 addr:$src)),
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 4c87963296d0..3f2ff254fdf9 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -4232,8 +4232,6 @@ let Predicates = [UseAVX] in {
// AVX 128-bit movd/movq instructions write zeros in the high 128-bit part.
// These instructions also write zeros in the high part of a 256-bit register.
- def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
- (VMOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzload32 addr:$src)),
(VMOVDI2PDIrm addr:$src)>;
def : Pat<(v8i32 (X86vzload32 addr:$src)),
@@ -4246,8 +4244,6 @@ let Predicates = [UseSSE2] in {
def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
(MOV64toPQIrr GR64:$src)>;
- def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
- (MOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzload32 addr:$src)),
(MOVDI2PDIrm addr:$src)>;
}
diff --git a/llvm/test/CodeGen/X86/buildvec-extract.ll b/llvm/test/CodeGen/X86/buildvec-extract.ll
index d1546677a69b..218701250e43 100644
--- a/llvm/test/CodeGen/X86/buildvec-extract.ll
+++ b/llvm/test/CodeGen/X86/buildvec-extract.ll
@@ -26,16 +26,23 @@ define <2 x i64> @extract0_i32_zext_insert0_i64_undef(<4 x i32> %x) {
}
define <2 x i64> @extract0_i32_zext_insert0_i64_zero(<4 x i32> %x) {
-; SSE-LABEL: extract0_i32_zext_insert0_i64_zero:
-; SSE: # %bb.0:
-; SSE-NEXT: movd %xmm0, %eax
-; SSE-NEXT: movq %rax, %xmm0
-; SSE-NEXT: retq
+; SSE2-LABEL: extract0_i32_zext_insert0_i64_zero:
+; SSE2: # %bb.0:
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
+; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: extract0_i32_zext_insert0_i64_zero:
+; SSE41: # %bb.0:
+; SSE41-NEXT: xorps %xmm1, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; SSE41-NEXT: retq
;
; AVX-LABEL: extract0_i32_zext_insert0_i64_zero:
; AVX: # %bb.0:
-; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 0
%z = zext i32 %e to i64
@@ -62,21 +69,23 @@ define <2 x i64> @extract1_i32_zext_insert0_i64_undef(<4 x i32> %x) {
define <2 x i64> @extract1_i32_zext_insert0_i64_zero(<4 x i32> %x) {
; SSE2-LABEL: extract1_i32_zext_insert0_i64_zero:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: movq %rax, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract1_i32_zext_insert0_i64_zero:
; SSE41: # %bb.0:
-; SSE41-NEXT: extractps $1, %xmm0, %eax
-; SSE41-NEXT: movq %rax, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE41-NEXT: pxor %xmm0, %xmm0
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
; SSE41-NEXT: retq
;
; AVX-LABEL: extract1_i32_zext_insert0_i64_zero:
; AVX: # %bb.0:
-; AVX-NEXT: vextractps $1, %xmm0, %eax
-; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 1
%z = zext i32 %e to i64
@@ -105,21 +114,23 @@ define <2 x i64> @extract2_i32_zext_insert0_i64_undef(<4 x i32> %x) {
define <2 x i64> @extract2_i32_zext_insert0_i64_zero(<4 x i32> %x) {
; SSE2-LABEL: extract2_i32_zext_insert0_i64_zero:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: movq %rax, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSE2-NEXT: retq
;
; SSE41-LABEL: extract2_i32_zext_insert0_i64_zero:
; SSE41: # %bb.0:
-; SSE41-NEXT: extractps $2, %xmm0, %eax
-; SSE41-NEXT: movq %rax, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE41-NEXT: pxor %xmm0, %xmm0
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
; SSE41-NEXT: retq
;
; AVX-LABEL: extract2_i32_zext_insert0_i64_zero:
; AVX: # %bb.0:
-; AVX-NEXT: vextractps $2, %xmm0, %eax
-; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 2
%z = zext i32 %e to i64
@@ -144,23 +155,14 @@ define <2 x i64> @extract3_i32_zext_insert0_i64_undef(<4 x i32> %x) {
}
define <2 x i64> @extract3_i32_zext_insert0_i64_zero(<4 x i32> %x) {
-; SSE2-LABEL: extract3_i32_zext_insert0_i64_zero:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: movq %rax, %xmm0
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: extract3_i32_zext_insert0_i64_zero:
-; SSE41: # %bb.0:
-; SSE41-NEXT: extractps $3, %xmm0, %eax
-; SSE41-NEXT: movq %rax, %xmm0
-; SSE41-NEXT: retq
+; SSE-LABEL: extract3_i32_zext_insert0_i64_zero:
+; SSE: # %bb.0:
+; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE-NEXT: retq
;
; AVX-LABEL: extract3_i32_zext_insert0_i64_zero:
; AVX: # %bb.0:
-; AVX-NEXT: vextractps $3, %xmm0, %eax
-; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 3
%z = zext i32 %e to i64
@@ -387,13 +389,13 @@ define <2 x i64> @extract0_i16_zext_insert0_i64_zero(<8 x i16> %x) {
; SSE-LABEL: extract0_i16_zext_insert0_i64_zero:
; SSE: # %bb.0:
; SSE-NEXT: pextrw $0, %xmm0, %eax
-; SSE-NEXT: movq %rax, %xmm0
+; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: extract0_i16_zext_insert0_i64_zero:
; AVX: # %bb.0:
; AVX-NEXT: vpextrw $0, %xmm0, %eax
-; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 0
%z = zext i16 %e to i64
@@ -423,13 +425,13 @@ define <2 x i64> @extract1_i16_zext_insert0_i64_zero(<8 x i16> %x) {
; SSE-LABEL: extract1_i16_zext_insert0_i64_zero:
; SSE: # %bb.0:
; SSE-NEXT: pextrw $1, %xmm0, %eax
-; SSE-NEXT: movq %rax, %xmm0
+; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: extract1_i16_zext_insert0_i64_zero:
; AVX: # %bb.0:
; AVX-NEXT: vpextrw $1, %xmm0, %eax
-; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 1
%z = zext i16 %e to i64
@@ -459,13 +461,13 @@ define <2 x i64> @extract2_i16_zext_insert0_i64_zero(<8 x i16> %x) {
; SSE-LABEL: extract2_i16_zext_insert0_i64_zero:
; SSE: # %bb.0:
; SSE-NEXT: pextrw $2, %xmm0, %eax
-; SSE-NEXT: movq %rax, %xmm0
+; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: extract2_i16_zext_insert0_i64_zero:
; AVX: # %bb.0:
; AVX-NEXT: vpextrw $2, %xmm0, %eax
-; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 2
%z = zext i16 %e to i64
@@ -493,13 +495,13 @@ define <2 x i64> @extract3_i16_zext_insert0_i64_zero(<8 x i16> %x) {
; SSE-LABEL: extract3_i16_zext_insert0_i64_zero:
; SSE: # %bb.0:
; SSE-NEXT: pextrw $3, %xmm0, %eax
-; SSE-NEXT: movq %rax, %xmm0
+; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: extract3_i16_zext_insert0_i64_zero:
; AVX: # %bb.0:
; AVX-NEXT: vpextrw $3, %xmm0, %eax
-; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: retq
%e = extractelement <8 x i16> %x, i32 3
%z = zext i16 %e to i64
diff --git a/llvm/test/CodeGen/X86/pshufb-mask-comments.ll b/llvm/test/CodeGen/X86/pshufb-mask-comments.ll
index d0ed99f92f3a..72cac5bdc83e 100644
--- a/llvm/test/CodeGen/X86/pshufb-mask-comments.ll
+++ b/llvm/test/CodeGen/X86/pshufb-mask-comments.ll
@@ -55,7 +55,7 @@ define <16 x i8> @test5(<16 x i8> %V) {
; CHECK-LABEL: test5:
; CHECK: # %bb.0:
; CHECK-NEXT: movl $1, %eax
-; CHECK-NEXT: movq %rax, %xmm1
+; CHECK-NEXT: movd %eax, %xmm1
; CHECK-NEXT: movdqa %xmm1, (%rax)
; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1,1]
; CHECK-NEXT: movaps %xmm1, (%rax)
diff --git a/llvm/test/CodeGen/X86/vec_set-A.ll b/llvm/test/CodeGen/X86/vec_set-A.ll
index 9c0e9388b25c..0a8b4d8a4407 100644
--- a/llvm/test/CodeGen/X86/vec_set-A.ll
+++ b/llvm/test/CodeGen/X86/vec_set-A.ll
@@ -12,7 +12,7 @@ define <2 x i64> @test1() nounwind {
; X64-LABEL: test1:
; X64: # %bb.0:
; X64-NEXT: movl $1, %eax
-; X64-NEXT: movq %rax, %xmm0
+; X64-NEXT: movd %eax, %xmm0
; X64-NEXT: retq
ret <2 x i64> < i64 1, i64 0 >
}
diff --git a/llvm/test/CodeGen/X86/vec_set-B.ll b/llvm/test/CodeGen/X86/vec_set-B.ll
index cd5ce9fbb07e..f4d6b64dc3ac 100644
--- a/llvm/test/CodeGen/X86/vec_set-B.ll
+++ b/llvm/test/CodeGen/X86/vec_set-B.ll
@@ -20,7 +20,7 @@ define <2 x i64> @test3(i64 %arg) nounwind {
; X64-LABEL: test3:
; X64: # %bb.0:
; X64-NEXT: andl $1234567, %edi # imm = 0x12D687
-; X64-NEXT: movq %rdi, %xmm0
+; X64-NEXT: movd %edi, %xmm0
; X64-NEXT: retq
%A = and i64 %arg, 1234567
%B = insertelement <2 x i64> zeroinitializer, i64 %A, i32 0
diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-128.ll b/llvm/test/CodeGen/X86/vector-lzcnt-128.ll
index 4d4326f08ea4..95b1ec0fee6b 100644
--- a/llvm/test/CodeGen/X86/vector-lzcnt-128.ll
+++ b/llvm/test/CodeGen/X86/vector-lzcnt-128.ll
@@ -1667,19 +1667,19 @@ define <2 x i64> @foldv2i64() nounwind {
; SSE-LABEL: foldv2i64:
; SSE: # %bb.0:
; SSE-NEXT: movl $55, %eax
-; SSE-NEXT: movq %rax, %xmm0
+; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: retq
;
; NOBW-LABEL: foldv2i64:
; NOBW: # %bb.0:
; NOBW-NEXT: movl $55, %eax
-; NOBW-NEXT: vmovq %rax, %xmm0
+; NOBW-NEXT: vmovd %eax, %xmm0
; NOBW-NEXT: retq
;
; AVX512VLBWDQ-LABEL: foldv2i64:
; AVX512VLBWDQ: # %bb.0:
; AVX512VLBWDQ-NEXT: movl $55, %eax
-; AVX512VLBWDQ-NEXT: vmovq %rax, %xmm0
+; AVX512VLBWDQ-NEXT: vmovd %eax, %xmm0
; AVX512VLBWDQ-NEXT: retq
;
; X32-SSE-LABEL: foldv2i64:
@@ -1695,19 +1695,19 @@ define <2 x i64> @foldv2i64u() nounwind {
; SSE-LABEL: foldv2i64u:
; SSE: # %bb.0:
; SSE-NEXT: movl $55, %eax
-; SSE-NEXT: movq %rax, %xmm0
+; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: retq
;
; NOBW-LABEL: foldv2i64u:
; NOBW: # %bb.0:
; NOBW-NEXT: movl $55, %eax
-; NOBW-NEXT: vmovq %rax, %xmm0
+; NOBW-NEXT: vmovd %eax, %xmm0
; NOBW-NEXT: retq
;
; AVX512VLBWDQ-LABEL: foldv2i64u:
; AVX512VLBWDQ: # %bb.0:
; AVX512VLBWDQ-NEXT: movl $55, %eax
-; AVX512VLBWDQ-NEXT: vmovq %rax, %xmm0
+; AVX512VLBWDQ-NEXT: vmovd %eax, %xmm0
; AVX512VLBWDQ-NEXT: retq
;
; X32-SSE-LABEL: foldv2i64u:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
index dfe2d4db18f1..31acce98bcac 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
@@ -140,19 +140,12 @@ define <8 x double> @shuffle_v8f64_06000000(<8 x double> %a, <8 x double> %b) {
}
define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double> %b) {
-; AVX512F-LABEL: shuffle_v8f64_70000000:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: movl $7, %eax
-; AVX512F-NEXT: vmovq %rax, %xmm1
-; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
-; AVX512F-NEXT: retq
-;
-; AVX512F-32-LABEL: shuffle_v8f64_70000000:
-; AVX512F-32: # %bb.0:
-; AVX512F-32-NEXT: movl $7, %eax
-; AVX512F-32-NEXT: vmovd %eax, %xmm1
-; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: retl
+; ALL-LABEL: shuffle_v8f64_70000000:
+; ALL: # %bb.0:
+; ALL-NEXT: movl $7, %eax
+; ALL-NEXT: vmovd %eax, %xmm1
+; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; ALL-NEXT: ret{{[l|q]}}
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x double> %shuffle
}
@@ -984,19 +977,12 @@ define <8 x i64> @shuffle_v8i64_06000000(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_70000000(<8 x i64> %a, <8 x i64> %b) {
;
-; AVX512F-LABEL: shuffle_v8i64_70000000:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: movl $7, %eax
-; AVX512F-NEXT: vmovq %rax, %xmm1
-; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
-; AVX512F-NEXT: retq
-;
-; AVX512F-32-LABEL: shuffle_v8i64_70000000:
-; AVX512F-32: # %bb.0:
-; AVX512F-32-NEXT: movl $7, %eax
-; AVX512F-32-NEXT: vmovd %eax, %xmm1
-; AVX512F-32-NEXT: vpermq %zmm0, %zmm1, %zmm0
-; AVX512F-32-NEXT: retl
+; ALL-LABEL: shuffle_v8i64_70000000:
+; ALL: # %bb.0:
+; ALL-NEXT: movl $7, %eax
+; ALL-NEXT: vmovd %eax, %xmm1
+; ALL-NEXT: vpermq %zmm0, %zmm1, %zmm0
+; ALL-NEXT: ret{{[l|q]}}
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i64> %shuffle
}
diff --git a/llvm/test/CodeGen/X86/vector-tzcnt-128.ll b/llvm/test/CodeGen/X86/vector-tzcnt-128.ll
index 8c32bf86700c..8fcf2361c0cd 100644
--- a/llvm/test/CodeGen/X86/vector-tzcnt-128.ll
+++ b/llvm/test/CodeGen/X86/vector-tzcnt-128.ll
@@ -1577,37 +1577,37 @@ define <2 x i64> @foldv2i64() nounwind {
; SSE-LABEL: foldv2i64:
; SSE: # %bb.0:
; SSE-NEXT: movl $8, %eax
-; SSE-NEXT: movq %rax, %xmm0
+; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: foldv2i64:
; AVX: # %bb.0:
; AVX-NEXT: movl $8, %eax
-; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: retq
;
; AVX512VPOPCNTDQ-LABEL: foldv2i64:
; AVX512VPOPCNTDQ: # %bb.0:
; AVX512VPOPCNTDQ-NEXT: movl $8, %eax
-; AVX512VPOPCNTDQ-NEXT: vmovq %rax, %xmm0
+; AVX512VPOPCNTDQ-NEXT: vmovd %eax, %xmm0
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: foldv2i64:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: movl $8, %eax
-; AVX512VPOPCNTDQVL-NEXT: vmovq %rax, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vmovd %eax, %xmm0
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: foldv2i64:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: movl $8, %eax
-; BITALG_NOVLX-NEXT: vmovq %rax, %xmm0
+; BITALG_NOVLX-NEXT: vmovd %eax, %xmm0
; BITALG_NOVLX-NEXT: retq
;
; BITALG-LABEL: foldv2i64:
; BITALG: # %bb.0:
; BITALG-NEXT: movl $8, %eax
-; BITALG-NEXT: vmovq %rax, %xmm0
+; BITALG-NEXT: vmovd %eax, %xmm0
; BITALG-NEXT: retq
;
; X32-SSE-LABEL: foldv2i64:
@@ -1623,37 +1623,37 @@ define <2 x i64> @foldv2i64u() nounwind {
; SSE-LABEL: foldv2i64u:
; SSE: # %bb.0:
; SSE-NEXT: movl $8, %eax
-; SSE-NEXT: movq %rax, %xmm0
+; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: foldv2i64u:
; AVX: # %bb.0:
; AVX-NEXT: movl $8, %eax
-; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: retq
;
; AVX512VPOPCNTDQ-LABEL: foldv2i64u:
; AVX512VPOPCNTDQ: # %bb.0:
; AVX512VPOPCNTDQ-NEXT: movl $8, %eax
-; AVX512VPOPCNTDQ-NEXT: vmovq %rax, %xmm0
+; AVX512VPOPCNTDQ-NEXT: vmovd %eax, %xmm0
; AVX512VPOPCNTDQ-NEXT: retq
;
; AVX512VPOPCNTDQVL-LABEL: foldv2i64u:
; AVX512VPOPCNTDQVL: # %bb.0:
; AVX512VPOPCNTDQVL-NEXT: movl $8, %eax
-; AVX512VPOPCNTDQVL-NEXT: vmovq %rax, %xmm0
+; AVX512VPOPCNTDQVL-NEXT: vmovd %eax, %xmm0
; AVX512VPOPCNTDQVL-NEXT: retq
;
; BITALG_NOVLX-LABEL: foldv2i64u:
; BITALG_NOVLX: # %bb.0:
; BITALG_NOVLX-NEXT: movl $8, %eax
-; BITALG_NOVLX-NEXT: vmovq %rax, %xmm0
+; BITALG_NOVLX-NEXT: vmovd %eax, %xmm0
; BITALG_NOVLX-NEXT: retq
;
; BITALG-LABEL: foldv2i64u:
; BITALG: # %bb.0:
; BITALG-NEXT: movl $8, %eax
-; BITALG-NEXT: vmovq %rax, %xmm0
+; BITALG-NEXT: vmovd %eax, %xmm0
; BITALG-NEXT: retq
;
; X32-SSE-LABEL: foldv2i64u:
More information about the llvm-commits
mailing list