[llvm] r361706 - [X86] lowerBuildVectorToBitOp - support build_vector(shift()) -> shift(build_vector(), C)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat May 25 11:02:17 PDT 2019
Author: rksimon
Date: Sat May 25 11:02:17 2019
New Revision: 361706
URL: http://llvm.org/viewvc/llvm-project?rev=361706&view=rev
Log:
[X86] lowerBuildVectorToBitOp - support build_vector(shift()) -> shift(build_vector(),C)
Commonly occurs in sign-extension cases
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/rotate-extract-vector.ll
llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll
llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll
llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll
llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll
llvm/trunk/test/CodeGen/X86/vector-sext.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=361706&r1=361705&r2=361706&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat May 25 11:02:17 2019
@@ -8746,9 +8746,15 @@ static SDValue lowerBuildVectorToBitOp(B
return SDValue();
// TODO: We may be able to add support for other Ops (ADD/SUB + shifts).
+ bool IsShift = false;
switch (Opcode) {
default:
return SDValue();
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ IsShift = true;
+ break;
case ISD::AND:
case ISD::XOR:
case ISD::OR:
@@ -8769,10 +8775,24 @@ static SDValue lowerBuildVectorToBitOp(B
// We expect the canonicalized RHS operand to be the constant.
if (!isa<ConstantSDNode>(RHS))
return SDValue();
+
+ // Extend shift amounts.
+ if (RHS.getValueSizeInBits() != VT.getScalarSizeInBits()) {
+ if (!IsShift)
+ return SDValue();
+ RHS = DAG.getZExtOrTrunc(RHS, DL, VT.getScalarType());
+ }
+
LHSElts.push_back(LHS);
RHSElts.push_back(RHS);
}
+ // Limit to shifts by uniform immediates.
+ // TODO: Only accept vXi8/vXi64 special cases?
+ // TODO: Permit non-uniform XOP/AVX2/MULLO cases?
+ if (IsShift && any_of(RHSElts, [&](SDValue V) { return RHSElts[0] != V; }))
+ return SDValue();
+
SDValue LHS = DAG.getBuildVector(VT, DL, LHSElts);
SDValue RHS = DAG.getBuildVector(VT, DL, RHSElts);
return DAG.getNode(Opcode, DL, VT, LHS, RHS);
Modified: llvm/trunk/test/CodeGen/X86/rotate-extract-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/rotate-extract-vector.ll?rev=361706&r1=361705&r2=361706&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/rotate-extract-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/rotate-extract-vector.ll Sat May 25 11:02:17 2019
@@ -86,13 +86,12 @@ define <2 x i64> @vrolq_extract_udiv(<2
; X64-NEXT: vpextrq $1, %xmm0, %rax
; X64-NEXT: movabsq $-6148914691236517205, %rcx # imm = 0xAAAAAAAAAAAAAAAB
; X64-NEXT: mulq %rcx
-; X64-NEXT: shrq %rdx
; X64-NEXT: vmovq %rdx, %xmm1
; X64-NEXT: vmovq %xmm0, %rax
; X64-NEXT: mulq %rcx
-; X64-NEXT: shrq %rdx
; X64-NEXT: vmovq %rdx, %xmm0
; X64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT: vpsrlq $1, %xmm0, %xmm0
; X64-NEXT: vprolq $57, %zmm0, %zmm0
; X64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; X64-NEXT: vzeroupper
@@ -256,24 +255,22 @@ define <2 x i64> @no_extract_udiv(<2 x i
; X64-NEXT: movabsq $-6148914691236517205, %rdi # imm = 0xAAAAAAAAAAAAAAAB
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: mulq %rdi
-; X64-NEXT: shrq %rdx
; X64-NEXT: vmovq %rdx, %xmm1
; X64-NEXT: vmovq %xmm0, %rsi
; X64-NEXT: movq %rsi, %rax
; X64-NEXT: mulq %rdi
-; X64-NEXT: shrq %rdx
; X64-NEXT: vmovq %rdx, %xmm0
; X64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT: vpsrlq $1, %xmm0, %xmm0
; X64-NEXT: movabsq $-6180857105216966645, %rdi # imm = 0xAA392F35DC17F00B
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: mulq %rdi
-; X64-NEXT: shrq $9, %rdx
; X64-NEXT: vmovq %rdx, %xmm1
; X64-NEXT: movq %rsi, %rax
; X64-NEXT: mulq %rdi
-; X64-NEXT: shrq $9, %rdx
; X64-NEXT: vmovq %rdx, %xmm2
; X64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; X64-NEXT: vpsrlq $9, %xmm1, %xmm1
; X64-NEXT: vpsllq $56, %xmm0, %xmm0
; X64-NEXT: vpor %xmm1, %xmm0, %xmm0
; X64-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll?rev=361706&r1=361705&r2=361706&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll Sat May 25 11:02:17 2019
@@ -19,7 +19,6 @@ define <2 x i64> @test_div7_2i64(<2 x i6
; SSE2-NEXT: subq %rdx, %rcx
; SSE2-NEXT: shrq %rcx
; SSE2-NEXT: addq %rdx, %rcx
-; SSE2-NEXT: shrq $2, %rcx
; SSE2-NEXT: movq %rcx, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE2-NEXT: movq %xmm0, %rcx
@@ -28,9 +27,9 @@ define <2 x i64> @test_div7_2i64(<2 x i6
; SSE2-NEXT: subq %rdx, %rcx
; SSE2-NEXT: shrq %rcx
; SSE2-NEXT: addq %rdx, %rcx
-; SSE2-NEXT: shrq $2, %rcx
; SSE2-NEXT: movq %rcx, %xmm0
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT: psrlq $2, %xmm1
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq
;
@@ -43,7 +42,6 @@ define <2 x i64> @test_div7_2i64(<2 x i6
; SSE41-NEXT: subq %rdx, %rcx
; SSE41-NEXT: shrq %rcx
; SSE41-NEXT: addq %rdx, %rcx
-; SSE41-NEXT: shrq $2, %rcx
; SSE41-NEXT: movq %rcx, %xmm1
; SSE41-NEXT: movq %xmm0, %rcx
; SSE41-NEXT: movq %rcx, %rax
@@ -51,9 +49,9 @@ define <2 x i64> @test_div7_2i64(<2 x i6
; SSE41-NEXT: subq %rdx, %rcx
; SSE41-NEXT: shrq %rcx
; SSE41-NEXT: addq %rdx, %rcx
-; SSE41-NEXT: shrq $2, %rcx
; SSE41-NEXT: movq %rcx, %xmm0
; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT: psrlq $2, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_div7_2i64:
@@ -65,7 +63,6 @@ define <2 x i64> @test_div7_2i64(<2 x i6
; AVX-NEXT: subq %rdx, %rcx
; AVX-NEXT: shrq %rcx
; AVX-NEXT: addq %rdx, %rcx
-; AVX-NEXT: shrq $2, %rcx
; AVX-NEXT: vmovq %rcx, %xmm1
; AVX-NEXT: vmovq %xmm0, %rcx
; AVX-NEXT: movq %rcx, %rax
@@ -73,9 +70,9 @@ define <2 x i64> @test_div7_2i64(<2 x i6
; AVX-NEXT: subq %rdx, %rcx
; AVX-NEXT: shrq %rcx
; AVX-NEXT: addq %rdx, %rcx
-; AVX-NEXT: shrq $2, %rcx
; AVX-NEXT: vmovq %rcx, %xmm0
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: vpsrlq $2, %xmm0, %xmm0
; AVX-NEXT: retq
%res = udiv <2 x i64> %a, <i64 7, i64 7>
ret <2 x i64> %res
Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll?rev=361706&r1=361705&r2=361706&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll Sat May 25 11:02:17 2019
@@ -10,32 +10,30 @@
define <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind {
; AVX1-LABEL: test_div7_4i64:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpextrq $1, %xmm1, %rcx
+; AVX1-NEXT: vpextrq $1, %xmm0, %rcx
; AVX1-NEXT: movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493
; AVX1-NEXT: movq %rcx, %rax
; AVX1-NEXT: mulq %rsi
; AVX1-NEXT: subq %rdx, %rcx
; AVX1-NEXT: shrq %rcx
; AVX1-NEXT: addq %rdx, %rcx
-; AVX1-NEXT: shrq $2, %rcx
-; AVX1-NEXT: vmovq %rcx, %xmm2
-; AVX1-NEXT: vmovq %xmm1, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm1
+; AVX1-NEXT: vmovq %xmm0, %rcx
; AVX1-NEXT: movq %rcx, %rax
; AVX1-NEXT: mulq %rsi
; AVX1-NEXT: subq %rdx, %rcx
; AVX1-NEXT: shrq %rcx
; AVX1-NEXT: addq %rdx, %rcx
-; AVX1-NEXT: shrq $2, %rcx
-; AVX1-NEXT: vmovq %rcx, %xmm1
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT: vmovq %rcx, %xmm2
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX1-NEXT: vpsrlq $2, %xmm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpextrq $1, %xmm0, %rcx
; AVX1-NEXT: movq %rcx, %rax
; AVX1-NEXT: mulq %rsi
; AVX1-NEXT: subq %rdx, %rcx
; AVX1-NEXT: shrq %rcx
; AVX1-NEXT: addq %rdx, %rcx
-; AVX1-NEXT: shrq $2, %rcx
; AVX1-NEXT: vmovq %rcx, %xmm2
; AVX1-NEXT: vmovq %xmm0, %rcx
; AVX1-NEXT: movq %rcx, %rax
@@ -43,10 +41,10 @@ define <4 x i64> @test_div7_4i64(<4 x i6
; AVX1-NEXT: subq %rdx, %rcx
; AVX1-NEXT: shrq %rcx
; AVX1-NEXT: addq %rdx, %rcx
-; AVX1-NEXT: shrq $2, %rcx
; AVX1-NEXT: vmovq %rcx, %xmm0
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vpsrlq $2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_div7_4i64:
@@ -59,7 +57,6 @@ define <4 x i64> @test_div7_4i64(<4 x i6
; AVX2-NEXT: subq %rdx, %rcx
; AVX2-NEXT: shrq %rcx
; AVX2-NEXT: addq %rdx, %rcx
-; AVX2-NEXT: shrq $2, %rcx
; AVX2-NEXT: vmovq %rcx, %xmm2
; AVX2-NEXT: vmovq %xmm1, %rcx
; AVX2-NEXT: movq %rcx, %rax
@@ -67,7 +64,6 @@ define <4 x i64> @test_div7_4i64(<4 x i6
; AVX2-NEXT: subq %rdx, %rcx
; AVX2-NEXT: shrq %rcx
; AVX2-NEXT: addq %rdx, %rcx
-; AVX2-NEXT: shrq $2, %rcx
; AVX2-NEXT: vmovq %rcx, %xmm1
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX2-NEXT: vpextrq $1, %xmm0, %rcx
@@ -76,7 +72,6 @@ define <4 x i64> @test_div7_4i64(<4 x i6
; AVX2-NEXT: subq %rdx, %rcx
; AVX2-NEXT: shrq %rcx
; AVX2-NEXT: addq %rdx, %rcx
-; AVX2-NEXT: shrq $2, %rcx
; AVX2-NEXT: vmovq %rcx, %xmm2
; AVX2-NEXT: vmovq %xmm0, %rcx
; AVX2-NEXT: movq %rcx, %rax
@@ -84,10 +79,10 @@ define <4 x i64> @test_div7_4i64(<4 x i6
; AVX2-NEXT: subq %rdx, %rcx
; AVX2-NEXT: shrq %rcx
; AVX2-NEXT: addq %rdx, %rcx
-; AVX2-NEXT: shrq $2, %rcx
; AVX2-NEXT: vmovq %rcx, %xmm0
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsrlq $2, %ymm0, %ymm0
; AVX2-NEXT: retq
%res = udiv <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
ret <4 x i64> %res
Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll?rev=361706&r1=361705&r2=361706&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll Sat May 25 11:02:17 2019
@@ -17,7 +17,6 @@ define <8 x i64> @test_div7_8i64(<8 x i6
; AVX-NEXT: subq %rdx, %rcx
; AVX-NEXT: shrq %rcx
; AVX-NEXT: addq %rdx, %rcx
-; AVX-NEXT: shrq $2, %rcx
; AVX-NEXT: vmovq %rcx, %xmm2
; AVX-NEXT: vmovq %xmm1, %rcx
; AVX-NEXT: movq %rcx, %rax
@@ -25,7 +24,6 @@ define <8 x i64> @test_div7_8i64(<8 x i6
; AVX-NEXT: subq %rdx, %rcx
; AVX-NEXT: shrq %rcx
; AVX-NEXT: addq %rdx, %rcx
-; AVX-NEXT: shrq $2, %rcx
; AVX-NEXT: vmovq %rcx, %xmm1
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
@@ -35,7 +33,6 @@ define <8 x i64> @test_div7_8i64(<8 x i6
; AVX-NEXT: subq %rdx, %rcx
; AVX-NEXT: shrq %rcx
; AVX-NEXT: addq %rdx, %rcx
-; AVX-NEXT: shrq $2, %rcx
; AVX-NEXT: vmovq %rcx, %xmm3
; AVX-NEXT: vmovq %xmm2, %rcx
; AVX-NEXT: movq %rcx, %rax
@@ -43,7 +40,6 @@ define <8 x i64> @test_div7_8i64(<8 x i6
; AVX-NEXT: subq %rdx, %rcx
; AVX-NEXT: shrq %rcx
; AVX-NEXT: addq %rdx, %rcx
-; AVX-NEXT: shrq $2, %rcx
; AVX-NEXT: vmovq %rcx, %xmm2
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
@@ -54,7 +50,6 @@ define <8 x i64> @test_div7_8i64(<8 x i6
; AVX-NEXT: subq %rdx, %rcx
; AVX-NEXT: shrq %rcx
; AVX-NEXT: addq %rdx, %rcx
-; AVX-NEXT: shrq $2, %rcx
; AVX-NEXT: vmovq %rcx, %xmm3
; AVX-NEXT: vmovq %xmm2, %rcx
; AVX-NEXT: movq %rcx, %rax
@@ -62,7 +57,6 @@ define <8 x i64> @test_div7_8i64(<8 x i6
; AVX-NEXT: subq %rdx, %rcx
; AVX-NEXT: shrq %rcx
; AVX-NEXT: addq %rdx, %rcx
-; AVX-NEXT: shrq $2, %rcx
; AVX-NEXT: vmovq %rcx, %xmm2
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; AVX-NEXT: vpextrq $1, %xmm0, %rcx
@@ -71,7 +65,6 @@ define <8 x i64> @test_div7_8i64(<8 x i6
; AVX-NEXT: subq %rdx, %rcx
; AVX-NEXT: shrq %rcx
; AVX-NEXT: addq %rdx, %rcx
-; AVX-NEXT: shrq $2, %rcx
; AVX-NEXT: vmovq %rcx, %xmm3
; AVX-NEXT: vmovq %xmm0, %rcx
; AVX-NEXT: movq %rcx, %rax
@@ -79,11 +72,11 @@ define <8 x i64> @test_div7_8i64(<8 x i6
; AVX-NEXT: subq %rdx, %rcx
; AVX-NEXT: shrq %rcx
; AVX-NEXT: addq %rdx, %rcx
-; AVX-NEXT: shrq $2, %rcx
; AVX-NEXT: vmovq %rcx, %xmm0
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; AVX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; AVX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX-NEXT: vpsrlq $2, %zmm0, %zmm0
; AVX-NEXT: retq
%res = udiv <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
ret <8 x i64> %res
Modified: llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll?rev=361706&r1=361705&r2=361706&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll Sat May 25 11:02:17 2019
@@ -1354,12 +1354,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(
; SSE-NEXT: movzbl (%rdi), %eax
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shlq $62, %rcx
-; SSE-NEXT: sarq $63, %rcx
-; SSE-NEXT: movq %rcx, %xmm1
+; SSE-NEXT: movq %rcx, %xmm0
; SSE-NEXT: shlq $63, %rax
-; SSE-NEXT: sarq $63, %rax
-; SSE-NEXT: movq %rax, %xmm0
-; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movq %rax, %xmm1
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: psrad $31, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE-NEXT: retq
;
; AVX1-LABEL: load_sext_2i1_to_2i64:
@@ -1367,12 +1367,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(
; AVX1-NEXT: movzbl (%rdi), %eax
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shlq $62, %rcx
-; AVX1-NEXT: sarq $63, %rcx
; AVX1-NEXT: vmovq %rcx, %xmm0
; AVX1-NEXT: shlq $63, %rax
-; AVX1-NEXT: sarq $63, %rax
; AVX1-NEXT: vmovq %rax, %xmm1
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: load_sext_2i1_to_2i64:
@@ -1380,12 +1380,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(
; AVX2-NEXT: movzbl (%rdi), %eax
; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: shlq $62, %rcx
-; AVX2-NEXT: sarq $63, %rcx
; AVX2-NEXT: vmovq %rcx, %xmm0
; AVX2-NEXT: shlq $63, %rax
-; AVX2-NEXT: sarq $63, %rax
; AVX2-NEXT: vmovq %rax, %xmm1
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: load_sext_2i1_to_2i64:
@@ -1402,14 +1402,13 @@ define <2 x i64> @load_sext_2i1_to_2i64(
; X32-SSE2-NEXT: movzbl (%eax), %eax
; X32-SSE2-NEXT: movl %eax, %ecx
; X32-SSE2-NEXT: shll $30, %ecx
-; X32-SSE2-NEXT: sarl $31, %ecx
; X32-SSE2-NEXT: movd %ecx, %xmm0
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
; X32-SSE2-NEXT: shll $31, %eax
-; X32-SSE2-NEXT: sarl $31, %eax
; X32-SSE2-NEXT: movd %eax, %xmm0
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT: psrad $31, %xmm0
; X32-SSE2-NEXT: retl
;
; X32-SSE41-LABEL: load_sext_2i1_to_2i64:
@@ -1418,13 +1417,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(
; X32-SSE41-NEXT: movzbl (%eax), %eax
; X32-SSE41-NEXT: movl %eax, %ecx
; X32-SSE41-NEXT: shll $31, %ecx
-; X32-SSE41-NEXT: sarl $31, %ecx
; X32-SSE41-NEXT: movd %ecx, %xmm0
; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0
; X32-SSE41-NEXT: shll $30, %eax
-; X32-SSE41-NEXT: sarl $31, %eax
; X32-SSE41-NEXT: pinsrd $2, %eax, %xmm0
; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT: psrad $31, %xmm0
; X32-SSE41-NEXT: retl
entry:
%X = load <2 x i1>, <2 x i1>* %ptr
@@ -1612,22 +1610,19 @@ define <4 x i32> @load_sext_4i1_to_4i32(
; X32-SSE2-NEXT: movl (%eax), %eax
; X32-SSE2-NEXT: movl %eax, %ecx
; X32-SSE2-NEXT: shll $28, %ecx
-; X32-SSE2-NEXT: sarl $31, %ecx
; X32-SSE2-NEXT: movd %ecx, %xmm0
; X32-SSE2-NEXT: movl %eax, %ecx
; X32-SSE2-NEXT: shll $29, %ecx
-; X32-SSE2-NEXT: sarl $31, %ecx
; X32-SSE2-NEXT: movd %ecx, %xmm1
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X32-SSE2-NEXT: movl %eax, %ecx
; X32-SSE2-NEXT: shll $30, %ecx
-; X32-SSE2-NEXT: sarl $31, %ecx
; X32-SSE2-NEXT: movd %ecx, %xmm2
; X32-SSE2-NEXT: shll $31, %eax
-; X32-SSE2-NEXT: sarl $31, %eax
; X32-SSE2-NEXT: movd %eax, %xmm0
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT: psrad $31, %xmm0
; X32-SSE2-NEXT: retl
;
; X32-SSE41-LABEL: load_sext_4i1_to_4i32:
@@ -1636,19 +1631,16 @@ define <4 x i32> @load_sext_4i1_to_4i32(
; X32-SSE41-NEXT: movl (%eax), %eax
; X32-SSE41-NEXT: movl %eax, %ecx
; X32-SSE41-NEXT: shll $30, %ecx
-; X32-SSE41-NEXT: sarl $31, %ecx
; X32-SSE41-NEXT: movl %eax, %edx
; X32-SSE41-NEXT: shll $31, %edx
-; X32-SSE41-NEXT: sarl $31, %edx
; X32-SSE41-NEXT: movd %edx, %xmm0
; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0
; X32-SSE41-NEXT: movl %eax, %ecx
; X32-SSE41-NEXT: shll $29, %ecx
-; X32-SSE41-NEXT: sarl $31, %ecx
; X32-SSE41-NEXT: pinsrd $2, %ecx, %xmm0
; X32-SSE41-NEXT: shll $28, %eax
-; X32-SSE41-NEXT: sarl $31, %eax
; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT: psrad $31, %xmm0
; X32-SSE41-NEXT: retl
entry:
%X = load <4 x i1>, <4 x i1>* %ptr
@@ -1808,22 +1800,20 @@ define <4 x i64> @load_sext_4i1_to_4i64(
; AVX2-NEXT: movl (%rdi), %eax
; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: shlq $60, %rcx
-; AVX2-NEXT: sarq $63, %rcx
; AVX2-NEXT: vmovq %rcx, %xmm0
; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: shlq $61, %rcx
-; AVX2-NEXT: sarq $63, %rcx
; AVX2-NEXT: vmovq %rcx, %xmm1
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: shlq $62, %rcx
-; AVX2-NEXT: sarq $63, %rcx
; AVX2-NEXT: vmovq %rcx, %xmm1
; AVX2-NEXT: shlq $63, %rax
-; AVX2-NEXT: sarq $63, %rax
; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: load_sext_4i1_to_4i64:
@@ -5990,22 +5980,19 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x
; X32-SSE2-NEXT: movl 8(%eax), %eax
; X32-SSE2-NEXT: shldl $13, %edx, %eax
; X32-SSE2-NEXT: shll $15, %eax
-; X32-SSE2-NEXT: sarl $15, %eax
; X32-SSE2-NEXT: movd %eax, %xmm0
; X32-SSE2-NEXT: movl %edx, %eax
; X32-SSE2-NEXT: shll $13, %eax
-; X32-SSE2-NEXT: sarl $15, %eax
; X32-SSE2-NEXT: movd %eax, %xmm1
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X32-SSE2-NEXT: shldl $15, %ecx, %edx
; X32-SSE2-NEXT: shll $15, %ecx
-; X32-SSE2-NEXT: sarl $15, %ecx
; X32-SSE2-NEXT: movd %ecx, %xmm0
; X32-SSE2-NEXT: shll $15, %edx
-; X32-SSE2-NEXT: sarl $15, %edx
; X32-SSE2-NEXT: movd %edx, %xmm2
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT: psrad $15, %xmm0
; X32-SSE2-NEXT: retl
;
; X32-SSE41-LABEL: sext_4i17_to_4i32:
@@ -6021,17 +6008,14 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x
; X32-SSE41-NEXT: shldl $13, %edx, %eax
; X32-SSE41-NEXT: shldl $15, %ecx, %edx
; X32-SSE41-NEXT: shll $15, %edx
-; X32-SSE41-NEXT: sarl $15, %edx
; X32-SSE41-NEXT: shll $15, %ecx
-; X32-SSE41-NEXT: sarl $15, %ecx
; X32-SSE41-NEXT: movd %ecx, %xmm0
; X32-SSE41-NEXT: pinsrd $1, %edx, %xmm0
; X32-SSE41-NEXT: shll $13, %esi
-; X32-SSE41-NEXT: sarl $15, %esi
; X32-SSE41-NEXT: pinsrd $2, %esi, %xmm0
; X32-SSE41-NEXT: shll $15, %eax
-; X32-SSE41-NEXT: sarl $15, %eax
; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT: psrad $15, %xmm0
; X32-SSE41-NEXT: popl %esi
; X32-SSE41-NEXT: .cfi_def_cfa_offset 4
; X32-SSE41-NEXT: retl
Modified: llvm/trunk/test/CodeGen/X86/vector-sext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-sext.ll?rev=361706&r1=361705&r2=361706&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-sext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-sext.ll Sat May 25 11:02:17 2019
@@ -1354,12 +1354,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(
; SSE-NEXT: movzbl (%rdi), %eax
; SSE-NEXT: movq %rax, %rcx
; SSE-NEXT: shlq $62, %rcx
-; SSE-NEXT: sarq $63, %rcx
-; SSE-NEXT: movq %rcx, %xmm1
+; SSE-NEXT: movq %rcx, %xmm0
; SSE-NEXT: shlq $63, %rax
-; SSE-NEXT: sarq $63, %rax
-; SSE-NEXT: movq %rax, %xmm0
-; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT: movq %rax, %xmm1
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT: psrad $31, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE-NEXT: retq
;
; AVX1-LABEL: load_sext_2i1_to_2i64:
@@ -1367,12 +1367,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(
; AVX1-NEXT: movzbl (%rdi), %eax
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shlq $62, %rcx
-; AVX1-NEXT: sarq $63, %rcx
; AVX1-NEXT: vmovq %rcx, %xmm0
; AVX1-NEXT: shlq $63, %rax
-; AVX1-NEXT: sarq $63, %rax
; AVX1-NEXT: vmovq %rax, %xmm1
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: load_sext_2i1_to_2i64:
@@ -1380,12 +1380,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(
; AVX2-NEXT: movzbl (%rdi), %eax
; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: shlq $62, %rcx
-; AVX2-NEXT: sarq $63, %rcx
; AVX2-NEXT: vmovq %rcx, %xmm0
; AVX2-NEXT: shlq $63, %rax
-; AVX2-NEXT: sarq $63, %rax
; AVX2-NEXT: vmovq %rax, %xmm1
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: load_sext_2i1_to_2i64:
@@ -1402,14 +1402,13 @@ define <2 x i64> @load_sext_2i1_to_2i64(
; X32-SSE2-NEXT: movzbl (%eax), %eax
; X32-SSE2-NEXT: movl %eax, %ecx
; X32-SSE2-NEXT: shll $30, %ecx
-; X32-SSE2-NEXT: sarl $31, %ecx
; X32-SSE2-NEXT: movd %ecx, %xmm0
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
; X32-SSE2-NEXT: shll $31, %eax
-; X32-SSE2-NEXT: sarl $31, %eax
; X32-SSE2-NEXT: movd %eax, %xmm0
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT: psrad $31, %xmm0
; X32-SSE2-NEXT: retl
;
; X32-SSE41-LABEL: load_sext_2i1_to_2i64:
@@ -1418,13 +1417,12 @@ define <2 x i64> @load_sext_2i1_to_2i64(
; X32-SSE41-NEXT: movzbl (%eax), %eax
; X32-SSE41-NEXT: movl %eax, %ecx
; X32-SSE41-NEXT: shll $31, %ecx
-; X32-SSE41-NEXT: sarl $31, %ecx
; X32-SSE41-NEXT: movd %ecx, %xmm0
; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0
; X32-SSE41-NEXT: shll $30, %eax
-; X32-SSE41-NEXT: sarl $31, %eax
; X32-SSE41-NEXT: pinsrd $2, %eax, %xmm0
; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT: psrad $31, %xmm0
; X32-SSE41-NEXT: retl
entry:
%X = load <2 x i1>, <2 x i1>* %ptr
@@ -1612,22 +1610,19 @@ define <4 x i32> @load_sext_4i1_to_4i32(
; X32-SSE2-NEXT: movl (%eax), %eax
; X32-SSE2-NEXT: movl %eax, %ecx
; X32-SSE2-NEXT: shll $28, %ecx
-; X32-SSE2-NEXT: sarl $31, %ecx
; X32-SSE2-NEXT: movd %ecx, %xmm0
; X32-SSE2-NEXT: movl %eax, %ecx
; X32-SSE2-NEXT: shll $29, %ecx
-; X32-SSE2-NEXT: sarl $31, %ecx
; X32-SSE2-NEXT: movd %ecx, %xmm1
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X32-SSE2-NEXT: movl %eax, %ecx
; X32-SSE2-NEXT: shll $30, %ecx
-; X32-SSE2-NEXT: sarl $31, %ecx
; X32-SSE2-NEXT: movd %ecx, %xmm2
; X32-SSE2-NEXT: shll $31, %eax
-; X32-SSE2-NEXT: sarl $31, %eax
; X32-SSE2-NEXT: movd %eax, %xmm0
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT: psrad $31, %xmm0
; X32-SSE2-NEXT: retl
;
; X32-SSE41-LABEL: load_sext_4i1_to_4i32:
@@ -1636,19 +1631,16 @@ define <4 x i32> @load_sext_4i1_to_4i32(
; X32-SSE41-NEXT: movl (%eax), %eax
; X32-SSE41-NEXT: movl %eax, %ecx
; X32-SSE41-NEXT: shll $30, %ecx
-; X32-SSE41-NEXT: sarl $31, %ecx
; X32-SSE41-NEXT: movl %eax, %edx
; X32-SSE41-NEXT: shll $31, %edx
-; X32-SSE41-NEXT: sarl $31, %edx
; X32-SSE41-NEXT: movd %edx, %xmm0
; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0
; X32-SSE41-NEXT: movl %eax, %ecx
; X32-SSE41-NEXT: shll $29, %ecx
-; X32-SSE41-NEXT: sarl $31, %ecx
; X32-SSE41-NEXT: pinsrd $2, %ecx, %xmm0
; X32-SSE41-NEXT: shll $28, %eax
-; X32-SSE41-NEXT: sarl $31, %eax
; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT: psrad $31, %xmm0
; X32-SSE41-NEXT: retl
entry:
%X = load <4 x i1>, <4 x i1>* %ptr
@@ -1808,22 +1800,20 @@ define <4 x i64> @load_sext_4i1_to_4i64(
; AVX2-NEXT: movl (%rdi), %eax
; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: shlq $60, %rcx
-; AVX2-NEXT: sarq $63, %rcx
; AVX2-NEXT: vmovq %rcx, %xmm0
; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: shlq $61, %rcx
-; AVX2-NEXT: sarq $63, %rcx
; AVX2-NEXT: vmovq %rcx, %xmm1
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: shlq $62, %rcx
-; AVX2-NEXT: sarq $63, %rcx
; AVX2-NEXT: vmovq %rcx, %xmm1
; AVX2-NEXT: shlq $63, %rax
-; AVX2-NEXT: sarq $63, %rax
; AVX2-NEXT: vmovq %rax, %xmm2
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: load_sext_4i1_to_4i64:
@@ -6008,22 +5998,19 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x
; X32-SSE2-NEXT: movl 8(%eax), %eax
; X32-SSE2-NEXT: shldl $13, %edx, %eax
; X32-SSE2-NEXT: shll $15, %eax
-; X32-SSE2-NEXT: sarl $15, %eax
; X32-SSE2-NEXT: movd %eax, %xmm0
; X32-SSE2-NEXT: movl %edx, %eax
; X32-SSE2-NEXT: shll $13, %eax
-; X32-SSE2-NEXT: sarl $15, %eax
; X32-SSE2-NEXT: movd %eax, %xmm1
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X32-SSE2-NEXT: shldl $15, %ecx, %edx
; X32-SSE2-NEXT: shll $15, %ecx
-; X32-SSE2-NEXT: sarl $15, %ecx
; X32-SSE2-NEXT: movd %ecx, %xmm0
; X32-SSE2-NEXT: shll $15, %edx
-; X32-SSE2-NEXT: sarl $15, %edx
; X32-SSE2-NEXT: movd %edx, %xmm2
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT: psrad $15, %xmm0
; X32-SSE2-NEXT: retl
;
; X32-SSE41-LABEL: sext_4i17_to_4i32:
@@ -6039,17 +6026,14 @@ define <4 x i32> @sext_4i17_to_4i32(<4 x
; X32-SSE41-NEXT: shldl $13, %edx, %eax
; X32-SSE41-NEXT: shldl $15, %ecx, %edx
; X32-SSE41-NEXT: shll $15, %edx
-; X32-SSE41-NEXT: sarl $15, %edx
; X32-SSE41-NEXT: shll $15, %ecx
-; X32-SSE41-NEXT: sarl $15, %ecx
; X32-SSE41-NEXT: movd %ecx, %xmm0
; X32-SSE41-NEXT: pinsrd $1, %edx, %xmm0
; X32-SSE41-NEXT: shll $13, %esi
-; X32-SSE41-NEXT: sarl $15, %esi
; X32-SSE41-NEXT: pinsrd $2, %esi, %xmm0
; X32-SSE41-NEXT: shll $15, %eax
-; X32-SSE41-NEXT: sarl $15, %eax
; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT: psrad $15, %xmm0
; X32-SSE41-NEXT: popl %esi
; X32-SSE41-NEXT: .cfi_def_cfa_offset 4
; X32-SSE41-NEXT: retl
More information about the llvm-commits
mailing list