[llvm] a14aa7d - [X86][SSE] combineExtractWithShuffle - extract(bictcast(scalar_to_vector(x))) --> x
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 22 08:12:07 PST 2020
Author: Simon Pilgrim
Date: 2020-01-22T16:11:08Z
New Revision: a14aa7dabde3e985c6ae3d89fd86c6be788c4b90
URL: https://github.com/llvm/llvm-project/commit/a14aa7dabde3e985c6ae3d89fd86c6be788c4b90
DIFF: https://github.com/llvm/llvm-project/commit/a14aa7dabde3e985c6ae3d89fd86c6be788c4b90.diff
LOG: [X86][SSE] combineExtractWithShuffle - extract(bictcast(scalar_to_vector(x))) --> x
Removes some unnecessary gpr<-->fpu traffic
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
llvm/test/CodeGen/X86/bitcast-vector-bool.ll
llvm/test/CodeGen/X86/oddsubvector.ll
llvm/test/CodeGen/X86/scalar_widen_div.ll
llvm/test/CodeGen/X86/vector-idiv-v2i32.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index cebb4e20e59f..54613776a3e4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37126,6 +37126,28 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
}
}
+ // Handle extract(scalar_to_vector(scalar_value)) for integers.
+ // TODO: Move to DAGCombine?
+ if (SrcBC.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isInteger() &&
+ SrcBC.getValueType().isInteger() &&
+ (SrcBC.getScalarValueSizeInBits() % SrcSVT.getSizeInBits()) == 0 &&
+ SrcBC.getScalarValueSizeInBits() ==
+ SrcBC.getOperand(0).getValueSizeInBits()) {
+ unsigned Scale = SrcBC.getScalarValueSizeInBits() / SrcSVT.getSizeInBits();
+ if (IdxC.ult(Scale)) {
+ unsigned Offset = IdxC.getZExtValue() * SrcVT.getScalarSizeInBits();
+ SDValue Scl = SrcBC.getOperand(0);
+ EVT SclVT = Scl.getValueType();
+ if (Offset) {
+ Scl = DAG.getNode(ISD::SRL, dl, SclVT, Scl,
+ DAG.getShiftAmountConstant(Offset, SclVT, dl));
+ }
+ Scl = DAG.getZExtOrTrunc(Scl, dl, SrcVT.getScalarType());
+ Scl = DAG.getZExtOrTrunc(Scl, dl, VT);
+ return Scl;
+ }
+ }
+
// Handle extract(truncate(x)) for 0'th index.
// TODO: Treat this as a faux shuffle?
// TODO: When can we use this for general indices?
diff --git a/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll b/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
index d181af7c75e7..2fd61879c163 100644
--- a/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
+++ b/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
@@ -17,18 +17,18 @@ target triple = "x86_64-unknown-linux-gnu"
define i32 @main() nounwind uwtable {
; CHECK-LABEL: main:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: pextrb $1, %xmm0, %ecx
-; CHECK-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: pextrb $1, %xmm1, %eax
+; CHECK-NEXT: movq {{.*}}(%rip), %rsi
+; CHECK-NEXT: movq {{.*}}(%rip), %rax
+; CHECK-NEXT: movq %rsi, %rdx
+; CHECK-NEXT: shrq $8, %rdx
+; CHECK-NEXT: movsbl %al, %ecx
+; CHECK-NEXT: shrq $8, %rax
; CHECK-NEXT: cbtw
-; CHECK-NEXT: pextrb $0, %xmm0, %edx
-; CHECK-NEXT: pextrb $0, %xmm1, %esi
-; CHECK-NEXT: idivb %cl
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: movsbl %sil, %eax
; CHECK-NEXT: idivb %dl
-; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: idivb %sil
+; CHECK-NEXT: movzbl %dl, %ecx
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: pinsrb $1, %ecx, %xmm0
diff --git a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
index 083d0c6f3d16..8dde976912b0 100644
--- a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
+++ b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll
@@ -61,14 +61,10 @@ define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind {
;
; AVX12-LABEL: bitcast_v4i32_to_v2i2:
; AVX12: # %bb.0:
-; AVX12-NEXT: vmovmskps %xmm0, %eax
-; AVX12-NEXT: movl %eax, %ecx
-; AVX12-NEXT: shrl $2, %ecx
-; AVX12-NEXT: vmovd %ecx, %xmm0
-; AVX12-NEXT: andl $3, %eax
-; AVX12-NEXT: vmovd %eax, %xmm1
-; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
-; AVX12-NEXT: vpextrb $0, %xmm0, %eax
+; AVX12-NEXT: vmovmskps %xmm0, %ecx
+; AVX12-NEXT: movl %ecx, %eax
+; AVX12-NEXT: shrl $2, %eax
+; AVX12-NEXT: andl $3, %ecx
; AVX12-NEXT: addb %cl, %al
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: retq
@@ -77,15 +73,13 @@ define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind {
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k0
-; AVX512-NEXT: kmovd %k0, %eax
-; AVX512-NEXT: movzbl %al, %ecx
-; AVX512-NEXT: shrl $2, %ecx
-; AVX512-NEXT: andl $3, %ecx
-; AVX512-NEXT: vmovd %ecx, %xmm0
+; AVX512-NEXT: kmovd %k0, %ecx
+; AVX512-NEXT: movzbl %cl, %eax
+; AVX512-NEXT: shrl $2, %eax
; AVX512-NEXT: andl $3, %eax
-; AVX512-NEXT: vmovd %eax, %xmm1
-; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vpbroadcastq %rax, %xmm0
+; AVX512-NEXT: andl $3, %ecx
+; AVX512-NEXT: vpextrb $8, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
@@ -116,14 +110,10 @@ define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind {
; AVX12-LABEL: bitcast_v8i16_to_v2i4:
; AVX12: # %bb.0:
; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
-; AVX12-NEXT: vpmovmskb %xmm0, %eax
-; AVX12-NEXT: movzbl %al, %ecx
-; AVX12-NEXT: shrl $4, %ecx
-; AVX12-NEXT: vmovd %ecx, %xmm0
-; AVX12-NEXT: andl $15, %eax
-; AVX12-NEXT: vmovd %eax, %xmm1
-; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
-; AVX12-NEXT: vpextrb $0, %xmm0, %eax
+; AVX12-NEXT: vpmovmskb %xmm0, %ecx
+; AVX12-NEXT: movzbl %cl, %eax
+; AVX12-NEXT: shrl $4, %eax
+; AVX12-NEXT: andl $15, %ecx
; AVX12-NEXT: addb %cl, %al
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: retq
@@ -131,14 +121,12 @@ define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind {
; AVX512-LABEL: bitcast_v8i16_to_v2i4:
; AVX512: # %bb.0:
; AVX512-NEXT: vpmovw2m %xmm0, %k0
-; AVX512-NEXT: kmovd %k0, %eax
-; AVX512-NEXT: movzbl %al, %ecx
-; AVX512-NEXT: shrl $4, %ecx
-; AVX512-NEXT: vmovd %ecx, %xmm0
-; AVX512-NEXT: andl $15, %eax
-; AVX512-NEXT: vmovd %eax, %xmm1
-; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: kmovd %k0, %ecx
+; AVX512-NEXT: movzbl %cl, %eax
+; AVX512-NEXT: shrl $4, %eax
+; AVX512-NEXT: vpbroadcastq %rax, %xmm0
+; AVX512-NEXT: andl $15, %ecx
+; AVX512-NEXT: vpextrb $8, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: retq
@@ -162,10 +150,9 @@ define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind {
;
; AVX12-LABEL: bitcast_v16i8_to_v2i8:
; AVX12: # %bb.0:
-; AVX12-NEXT: vpmovmskb %xmm0, %eax
-; AVX12-NEXT: vmovd %eax, %xmm0
-; AVX12-NEXT: vpextrb $0, %xmm0, %ecx
-; AVX12-NEXT: vpextrb $1, %xmm0, %eax
+; AVX12-NEXT: vpmovmskb %xmm0, %ecx
+; AVX12-NEXT: movl %ecx, %eax
+; AVX12-NEXT: shrl $8, %eax
; AVX12-NEXT: addb %cl, %al
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: retq
@@ -210,14 +197,10 @@ define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind {
;
; AVX12-LABEL: bitcast_v4i64_to_v2i2:
; AVX12: # %bb.0:
-; AVX12-NEXT: vmovmskpd %ymm0, %eax
-; AVX12-NEXT: movl %eax, %ecx
-; AVX12-NEXT: shrl $2, %ecx
-; AVX12-NEXT: vmovd %ecx, %xmm0
-; AVX12-NEXT: andl $3, %eax
-; AVX12-NEXT: vmovd %eax, %xmm1
-; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
-; AVX12-NEXT: vpextrb $0, %xmm0, %eax
+; AVX12-NEXT: vmovmskpd %ymm0, %ecx
+; AVX12-NEXT: movl %ecx, %eax
+; AVX12-NEXT: shrl $2, %eax
+; AVX12-NEXT: andl $3, %ecx
; AVX12-NEXT: addb %cl, %al
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: vzeroupper
@@ -227,15 +210,13 @@ define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind {
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpcmpgtq %ymm0, %ymm1, %k0
-; AVX512-NEXT: kmovd %k0, %eax
-; AVX512-NEXT: movzbl %al, %ecx
-; AVX512-NEXT: shrl $2, %ecx
-; AVX512-NEXT: andl $3, %ecx
-; AVX512-NEXT: vmovd %ecx, %xmm0
+; AVX512-NEXT: kmovd %k0, %ecx
+; AVX512-NEXT: movzbl %cl, %eax
+; AVX512-NEXT: shrl $2, %eax
; AVX512-NEXT: andl $3, %eax
-; AVX512-NEXT: vmovd %eax, %xmm1
-; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: vpbroadcastq %rax, %xmm0
+; AVX512-NEXT: andl $3, %ecx
+; AVX512-NEXT: vpextrb $8, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
@@ -267,14 +248,10 @@ define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind {
;
; AVX12-LABEL: bitcast_v8i32_to_v2i4:
; AVX12: # %bb.0:
-; AVX12-NEXT: vmovmskps %ymm0, %eax
-; AVX12-NEXT: movl %eax, %ecx
-; AVX12-NEXT: shrl $4, %ecx
-; AVX12-NEXT: vmovd %ecx, %xmm0
-; AVX12-NEXT: andl $15, %eax
-; AVX12-NEXT: vmovd %eax, %xmm1
-; AVX12-NEXT: vpextrb $0, %xmm1, %ecx
-; AVX12-NEXT: vpextrb $0, %xmm0, %eax
+; AVX12-NEXT: vmovmskps %ymm0, %ecx
+; AVX12-NEXT: movl %ecx, %eax
+; AVX12-NEXT: shrl $4, %eax
+; AVX12-NEXT: andl $15, %ecx
; AVX12-NEXT: addb %cl, %al
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: vzeroupper
@@ -284,14 +261,12 @@ define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind {
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpcmpgtd %ymm0, %ymm1, %k0
-; AVX512-NEXT: kmovd %k0, %eax
-; AVX512-NEXT: movzbl %al, %ecx
-; AVX512-NEXT: shrl $4, %ecx
-; AVX512-NEXT: vmovd %ecx, %xmm0
-; AVX512-NEXT: andl $15, %eax
-; AVX512-NEXT: vmovd %eax, %xmm1
-; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: kmovd %k0, %ecx
+; AVX512-NEXT: movzbl %cl, %eax
+; AVX512-NEXT: shrl $4, %eax
+; AVX512-NEXT: vpbroadcastq %rax, %xmm0
+; AVX512-NEXT: andl $15, %ecx
+; AVX512-NEXT: vpextrb $8, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
@@ -319,10 +294,9 @@ define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind {
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: vmovd %eax, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
-; AVX1-NEXT: vpextrb $1, %xmm0, %eax
+; AVX1-NEXT: vpmovmskb %xmm0, %ecx
+; AVX1-NEXT: movl %ecx, %eax
+; AVX1-NEXT: shrl $8, %eax
; AVX1-NEXT: addb %cl, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -334,10 +308,9 @@ define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind {
; AVX2-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpmovmskb %xmm0, %eax
-; AVX2-NEXT: vmovd %eax, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
-; AVX2-NEXT: vpextrb $1, %xmm0, %eax
+; AVX2-NEXT: vpmovmskb %xmm0, %ecx
+; AVX2-NEXT: movl %ecx, %eax
+; AVX2-NEXT: shrl $8, %eax
; AVX2-NEXT: addb %cl, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -365,23 +338,17 @@ define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind {
define i16 @bitcast_v32i8_to_v2i16(<32 x i8> %a0) nounwind {
; SSE2-SSSE3-LABEL: bitcast_v32i8_to_v2i16:
; SSE2-SSSE3: # %bb.0:
-; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %ecx
-; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %eax
-; SSE2-SSSE3-NEXT: shll $16, %eax
-; SSE2-SSSE3-NEXT: movd %eax, %xmm0
-; SSE2-SSSE3-NEXT: pextrw $1, %xmm0, %eax
+; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %ecx
+; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
; SSE2-SSSE3-NEXT: addl %ecx, %eax
; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-SSSE3-NEXT: retq
;
; AVX1-LABEL: bitcast_v32i8_to_v2i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpmovmskb %xmm0, %ecx
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpmovmskb %xmm1, %ecx
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: shll $16, %eax
-; AVX1-NEXT: vmovd %eax, %xmm0
-; AVX1-NEXT: vpextrw $1, %xmm0, %eax
; AVX1-NEXT: addl %ecx, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
@@ -390,8 +357,8 @@ define i16 @bitcast_v32i8_to_v2i16(<32 x i8> %a0) nounwind {
; AVX2-LABEL: bitcast_v32i8_to_v2i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm0
-; AVX2-NEXT: vpextrw $1, %xmm0, %eax
+; AVX2-NEXT: movl %ecx, %eax
+; AVX2-NEXT: shrl $16, %eax
; AVX2-NEXT: addl %ecx, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@@ -455,14 +422,10 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind {
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $4, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm0
-; AVX1-NEXT: andl $15, %eax
-; AVX1-NEXT: vmovd %eax, %xmm1
-; AVX1-NEXT: vpextrb $0, %xmm1, %ecx
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
+; AVX1-NEXT: vmovmskps %ymm0, %ecx
+; AVX1-NEXT: movl %ecx, %eax
+; AVX1-NEXT: shrl $4, %eax
+; AVX1-NEXT: andl $15, %ecx
; AVX1-NEXT: addb %cl, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -472,14 +435,10 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind {
; AVX2: # %bb.0:
; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
-; AVX2-NEXT: vmovmskps %ymm0, %eax
-; AVX2-NEXT: movl %eax, %ecx
-; AVX2-NEXT: shrl $4, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm0
-; AVX2-NEXT: andl $15, %eax
-; AVX2-NEXT: vmovd %eax, %xmm1
-; AVX2-NEXT: vpextrb $0, %xmm1, %ecx
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vmovmskps %ymm0, %ecx
+; AVX2-NEXT: movl %ecx, %eax
+; AVX2-NEXT: shrl $4, %eax
+; AVX2-NEXT: andl $15, %ecx
; AVX2-NEXT: addb %cl, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -489,14 +448,12 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind {
; AVX512: # %bb.0:
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
-; AVX512-NEXT: kmovd %k0, %eax
-; AVX512-NEXT: movzbl %al, %ecx
-; AVX512-NEXT: shrl $4, %ecx
-; AVX512-NEXT: vmovd %ecx, %xmm0
-; AVX512-NEXT: andl $15, %eax
-; AVX512-NEXT: vmovd %eax, %xmm1
-; AVX512-NEXT: vpextrb $0, %xmm1, %ecx
-; AVX512-NEXT: vpextrb $0, %xmm0, %eax
+; AVX512-NEXT: kmovd %k0, %ecx
+; AVX512-NEXT: movzbl %cl, %eax
+; AVX512-NEXT: shrl $4, %eax
+; AVX512-NEXT: vpbroadcastq %rax, %xmm0
+; AVX512-NEXT: andl $15, %ecx
+; AVX512-NEXT: vpextrb $8, %xmm0, %eax
; AVX512-NEXT: addb %cl, %al
; AVX512-NEXT: # kill: def $al killed $al killed $eax
; AVX512-NEXT: vzeroupper
@@ -529,10 +486,9 @@ define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: vmovd %eax, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %ecx
-; AVX1-NEXT: vpextrb $1, %xmm0, %eax
+; AVX1-NEXT: vpmovmskb %xmm0, %ecx
+; AVX1-NEXT: movl %ecx, %eax
+; AVX1-NEXT: shrl $8, %eax
; AVX1-NEXT: addb %cl, %al
; AVX1-NEXT: # kill: def $al killed $al killed $eax
; AVX1-NEXT: vzeroupper
@@ -547,10 +503,9 @@ define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind {
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpmovmskb %xmm0, %eax
-; AVX2-NEXT: vmovd %eax, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %ecx
-; AVX2-NEXT: vpextrb $1, %xmm0, %eax
+; AVX2-NEXT: vpmovmskb %xmm0, %ecx
+; AVX2-NEXT: movl %ecx, %eax
+; AVX2-NEXT: shrl $8, %eax
; AVX2-NEXT: addb %cl, %al
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
@@ -579,28 +534,22 @@ define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind {
define i16 @bitcast_v32i16_to_v2i16(<32 x i16> %a0) nounwind {
; SSE2-SSSE3-LABEL: bitcast_v32i16_to_v2i16:
; SSE2-SSSE3: # %bb.0:
-; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %ecx
; SSE2-SSSE3-NEXT: packsswb %xmm3, %xmm2
-; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %eax
-; SSE2-SSSE3-NEXT: shll $16, %eax
-; SSE2-SSSE3-NEXT: movd %eax, %xmm0
-; SSE2-SSSE3-NEXT: pextrw $1, %xmm0, %eax
+; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %ecx
+; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0
+; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax
; SSE2-SSSE3-NEXT: addl %ecx, %eax
; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax
; SSE2-SSSE3-NEXT: retq
;
; AVX1-LABEL: bitcast_v32i16_to_v2i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpmovmskb %xmm0, %ecx
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
-; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpmovmskb %xmm1, %ecx
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: shll $16, %eax
-; AVX1-NEXT: vmovd %eax, %xmm0
-; AVX1-NEXT: vpextrw $1, %xmm0, %eax
; AVX1-NEXT: addl %ecx, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
@@ -611,8 +560,8 @@ define i16 @bitcast_v32i16_to_v2i16(<32 x i16> %a0) nounwind {
; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
; AVX2-NEXT: vpmovmskb %ymm0, %ecx
-; AVX2-NEXT: vmovd %ecx, %xmm0
-; AVX2-NEXT: vpextrw $1, %xmm0, %eax
+; AVX2-NEXT: movl %ecx, %eax
+; AVX2-NEXT: shrl $16, %eax
; AVX2-NEXT: addl %ecx, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/oddsubvector.ll b/llvm/test/CodeGen/X86/oddsubvector.ll
index e4cc9b84773b..674c79de4c73 100644
--- a/llvm/test/CodeGen/X86/oddsubvector.ll
+++ b/llvm/test/CodeGen/X86/oddsubvector.ll
@@ -9,65 +9,73 @@
; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+xop | FileCheck %s --check-prefixes=AVX,XOP
define void @insert_v7i8_v2i16_2(<7 x i8> *%a0, <2 x i16> *%a1) nounwind {
-; SSE2-LABEL: insert_v7i8_v2i16_2:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; SSE2-NEXT: pextrw $3, %xmm1, %eax
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: movd %xmm1, (%rdi)
-; SSE2-NEXT: movb %al, 6(%rdi)
-; SSE2-NEXT: pextrw $1, %xmm0, %eax
-; SSE2-NEXT: movw %ax, 4(%rdi)
-; SSE2-NEXT: retq
-;
-; SSE42-LABEL: insert_v7i8_v2i16_2:
-; SSE42: # %bb.0:
-; SSE42-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE42-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; SSE42-NEXT: pextrb $6, %xmm1, 6(%rdi)
-; SSE42-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE42-NEXT: pextrw $1, %xmm0, 4(%rdi)
-; SSE42-NEXT: movd %xmm1, (%rdi)
-; SSE42-NEXT: retq
+; SSE-LABEL: insert_v7i8_v2i16_2:
+; SSE: # %bb.0:
+; SSE-NEXT: movl (%rsi), %eax
+; SSE-NEXT: movd %eax, %xmm0
+; SSE-NEXT: movq (%rdi), %rcx
+; SSE-NEXT: movq %rcx, %xmm1
+; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE-NEXT: shrq $48, %rcx
+; SSE-NEXT: movb %cl, 6(%rdi)
+; SSE-NEXT: shrl $16, %eax
+; SSE-NEXT: movw %ax, 4(%rdi)
+; SSE-NEXT: movd %xmm1, (%rdi)
+; SSE-NEXT: retq
;
; AVX1-LABEL: insert_v7i8_v2i16_2:
; AVX1: # %bb.0:
-; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; AVX1-NEXT: vpextrb $6, %xmm1, 6(%rdi)
-; AVX1-NEXT: vpextrw $1, %xmm0, 4(%rdi)
-; AVX1-NEXT: vmovd %xmm2, (%rdi)
+; AVX1-NEXT: movl (%rsi), %eax
+; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: movq (%rdi), %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm1
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; AVX1-NEXT: shrq $48, %rcx
+; AVX1-NEXT: movb %cl, 6(%rdi)
+; AVX1-NEXT: shrl $16, %eax
+; AVX1-NEXT: movw %ax, 4(%rdi)
+; AVX1-NEXT: vmovd %xmm0, (%rdi)
; AVX1-NEXT: retq
;
; AVX2-LABEL: insert_v7i8_v2i16_2:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; AVX2-NEXT: vpextrb $6, %xmm1, 6(%rdi)
-; AVX2-NEXT: vpextrw $1, %xmm0, 4(%rdi)
-; AVX2-NEXT: vmovd %xmm2, (%rdi)
+; AVX2-NEXT: movl (%rsi), %eax
+; AVX2-NEXT: vmovd %eax, %xmm0
+; AVX2-NEXT: movq (%rdi), %rcx
+; AVX2-NEXT: vmovq %rcx, %xmm1
+; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; AVX2-NEXT: shrq $48, %rcx
+; AVX2-NEXT: movb %cl, 6(%rdi)
+; AVX2-NEXT: shrl $16, %eax
+; AVX2-NEXT: movw %ax, 4(%rdi)
+; AVX2-NEXT: vmovd %xmm0, (%rdi)
; AVX2-NEXT: retq
;
; AVX512-LABEL: insert_v7i8_v2i16_2:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; AVX512-NEXT: vpextrb $6, %xmm1, 6(%rdi)
-; AVX512-NEXT: vpextrw $1, %xmm0, 4(%rdi)
-; AVX512-NEXT: vmovd %xmm2, (%rdi)
+; AVX512-NEXT: movl (%rsi), %eax
+; AVX512-NEXT: vmovd %eax, %xmm0
+; AVX512-NEXT: movq (%rdi), %rcx
+; AVX512-NEXT: vmovq %rcx, %xmm1
+; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; AVX512-NEXT: shrq $48, %rcx
+; AVX512-NEXT: movb %cl, 6(%rdi)
+; AVX512-NEXT: shrl $16, %eax
+; AVX512-NEXT: movw %ax, 4(%rdi)
+; AVX512-NEXT: vmovd %xmm0, (%rdi)
; AVX512-NEXT: retq
;
; XOP-LABEL: insert_v7i8_v2i16_2:
; XOP: # %bb.0:
-; XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; XOP-NEXT: vpextrb $6, %xmm1, 6(%rdi)
+; XOP-NEXT: movl (%rsi), %eax
+; XOP-NEXT: vmovd %eax, %xmm0
+; XOP-NEXT: movq (%rdi), %rcx
+; XOP-NEXT: vmovq %rcx, %xmm1
; XOP-NEXT: insertq {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,1,2,3],xmm1[6,7,u,u,u,u,u,u,u,u]
-; XOP-NEXT: vpextrw $1, %xmm0, 4(%rdi)
+; XOP-NEXT: shrq $48, %rcx
+; XOP-NEXT: movb %cl, 6(%rdi)
+; XOP-NEXT: shrl $16, %eax
+; XOP-NEXT: movw %ax, 4(%rdi)
; XOP-NEXT: vmovd %xmm1, (%rdi)
; XOP-NEXT: retq
%1 = load <2 x i16>, <2 x i16> *%a1
diff --git a/llvm/test/CodeGen/X86/scalar_widen_div.ll b/llvm/test/CodeGen/X86/scalar_widen_div.ll
index c6deb686e961..f658df20990b 100644
--- a/llvm/test/CodeGen/X86/scalar_widen_div.ll
+++ b/llvm/test/CodeGen/X86/scalar_widen_div.ll
@@ -13,19 +13,21 @@ define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)
; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movslq -{{[0-9]+}}(%rsp), %rcx
-; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: pextrd $1, %xmm0, %eax
-; CHECK-NEXT: pextrd $1, %xmm1, %esi
+; CHECK-NEXT: movq (%rdi,%rcx,8), %rdi
+; CHECK-NEXT: movq (%rsi,%rcx,8), %r10
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: shrq $32, %rax
+; CHECK-NEXT: movq %r10, %rsi
+; CHECK-NEXT: shrq $32, %rsi
+; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
; CHECK-NEXT: cltd
; CHECK-NEXT: idivl %esi
-; CHECK-NEXT: movl %eax, %esi
-; CHECK-NEXT: movd %xmm0, %eax
-; CHECK-NEXT: movd %xmm1, %edi
+; CHECK-NEXT: movl %eax, %r9d
+; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: cltd
-; CHECK-NEXT: idivl %edi
+; CHECK-NEXT: idivl %r10d
; CHECK-NEXT: movd %eax, %xmm0
-; CHECK-NEXT: pinsrd $1, %esi, %xmm0
+; CHECK-NEXT: pinsrd $1, %r9d, %xmm0
; CHECK-NEXT: movq %xmm0, (%r8,%rcx,8)
; CHECK-NEXT: retq
entry:
diff --git a/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll b/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll
index 6e6dd6982993..b3accf80744f 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-v2i32.ll
@@ -324,10 +324,11 @@ define void @test_udiv_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwi
; X64-LABEL: test_udiv_v2i32:
; X64: # %bb.0:
; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: movd %xmm1, %esi
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: movq %rax, %xmm0
+; X64-NEXT: movq (%rsi), %rsi
+; X64-NEXT: movq %rsi, %xmm1
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divl %esi
; X64-NEXT: movd %eax, %xmm2
@@ -377,10 +378,11 @@ define void @test_urem_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwi
; X64-LABEL: test_urem_v2i32:
; X64: # %bb.0:
; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: movd %xmm1, %esi
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: movq %rax, %xmm0
+; X64-NEXT: movq (%rsi), %rsi
+; X64-NEXT: movq %rsi, %xmm1
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divl %esi
; X64-NEXT: movd %edx, %xmm2
@@ -430,10 +432,11 @@ define void @test_sdiv_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwi
; X64-LABEL: test_sdiv_v2i32:
; X64: # %bb.0:
; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: movd %xmm1, %esi
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: movq %rax, %xmm0
+; X64-NEXT: movq (%rsi), %rsi
+; X64-NEXT: movq %rsi, %xmm1
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: cltd
; X64-NEXT: idivl %esi
; X64-NEXT: movd %eax, %xmm2
@@ -488,10 +491,11 @@ define void @test_srem_v2i32(<2 x i32>* %x, <2 x i32>* %y, <2 x i32>* %z) nounwi
; X64-LABEL: test_srem_v2i32:
; X64: # %bb.0:
; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: movd %xmm1, %esi
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: movq %rax, %xmm0
+; X64-NEXT: movq (%rsi), %rsi
+; X64-NEXT: movq %rsi, %xmm1
+; X64-NEXT: # kill: def $eax killed $eax killed $rax
; X64-NEXT: cltd
; X64-NEXT: idivl %esi
; X64-NEXT: movd %eax, %xmm2
More information about the llvm-commits
mailing list