[llvm] [X86] Convert lshr(x, BW-1) -> and(x, 1) iff x is allsignbits (PR #83596)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 1 11:21:01 PST 2024
https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/83596
>From 7628b3c0248198b659ea753eb0334bd83d768807 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Fri, 1 Mar 2024 17:08:19 +0000
Subject: [PATCH] [X86] Convert logicalshift(x, C) -> and(x, M) iff x is
allsignbits
If we're shift an all-signbits value, then we can just mask out the shifted bits.
This helps removes some unnecessary bitcasted vXi16 shifts used for vXi8 shifts (which SimplifyDemandedBits will struggle to remove through the bitcast), and allows some AVX1 shifts of 256-bit values to stay as a YMM instruction.
Noticed in codegen from #82290
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 12 +-
.../X86/bitcast-int-to-vector-bool-zext.ll | 103 +++++++-----------
.../CodeGen/X86/bitcast-int-to-vector-bool.ll | 11 +-
llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll | 18 +--
4 files changed, 52 insertions(+), 92 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0162bb65afe3b0..866a2a94a0bfe9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -28932,6 +28932,7 @@ static SDValue LowerShiftByScalarImmediate(SDValue Op, SelectionDAG &DAG,
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
unsigned X86Opc = getTargetVShiftUniformOpcode(Op.getOpcode(), false);
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
auto ArithmeticShiftRight64 = [&](uint64_t ShiftAmt) {
assert((VT == MVT::v2i64 || VT == MVT::v4i64) && "Unexpected SRA type");
@@ -28978,7 +28979,7 @@ static SDValue LowerShiftByScalarImmediate(SDValue Op, SelectionDAG &DAG,
return SDValue();
// If the shift amount is out of range, return undef.
- if (APIntShiftAmt.uge(VT.getScalarSizeInBits()))
+ if (APIntShiftAmt.uge(EltSizeInBits))
return DAG.getUNDEF(VT);
uint64_t ShiftAmt = APIntShiftAmt.getZExtValue();
@@ -29006,6 +29007,15 @@ static SDValue LowerShiftByScalarImmediate(SDValue Op, SelectionDAG &DAG,
Op.getOpcode() == ISD::SRA)
return ArithmeticShiftRight64(ShiftAmt);
+ // If we're logical shifting an all-signbits value then we can just perform as
+ // a mask.
+ if ((Op.getOpcode() == ISD::SHL || Op.getOpcode() == ISD::SRL) &&
+ DAG.ComputeNumSignBits(R) == EltSizeInBits) {
+ SDValue Mask = DAG.getAllOnesConstant(dl, VT);
+ Mask = DAG.getNode(Op.getOpcode(), dl, VT, Mask, Amt);
+ return DAG.getNode(ISD::AND, dl, VT, R, Mask);
+ }
+
if (VT == MVT::v16i8 || (Subtarget.hasInt256() && VT == MVT::v32i8) ||
(Subtarget.hasBWI() && VT == MVT::v64i8)) {
unsigned NumElts = VT.getVectorNumElements();
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
index ee39b1333fff31..d2794df731b65d 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
@@ -180,7 +180,6 @@ define <16 x i8> @ext_i16_16i8(i16 %a0) {
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; SSE2-NEXT: pand %xmm1, %xmm0
; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; SSE2-NEXT: psrlw $7, %xmm0
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: retq
;
@@ -191,7 +190,6 @@ define <16 x i8> @ext_i16_16i8(i16 %a0) {
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; SSSE3-NEXT: pand %xmm1, %xmm0
; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
-; SSSE3-NEXT: psrlw $7, %xmm0
; SSSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSSE3-NEXT: retq
;
@@ -203,7 +201,6 @@ define <16 x i8> @ext_i16_16i8(i16 %a0) {
; AVX1-NEXT: # xmm1 = mem[0,0]
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: retq
;
@@ -214,7 +211,6 @@ define <16 x i8> @ext_i16_16i8(i16 %a0) {
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $7, %xmm0, %xmm0
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: retq
;
@@ -268,11 +264,10 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) {
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8]
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm1
-; AVX1-NEXT: vpsrlq $63, %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i4_4i64:
@@ -328,11 +323,10 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) {
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
-; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i8_8i32:
@@ -390,11 +384,10 @@ define <16 x i16> @ext_i16_16i16(i16 %a0) {
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1
-; AVX1-NEXT: vpsrlw $15, %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i16_16i16:
@@ -436,14 +429,12 @@ define <32 x i8> @ext_i32_32i8(i32 %a0) {
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
-; SSE2-SSSE3-NEXT: psrlw $7, %xmm0
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1
-; SSE2-SSSE3-NEXT: psrlw $7, %xmm1
; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1
; SSE2-SSSE3-NEXT: retq
;
@@ -460,13 +451,9 @@ define <32 x i8> @ext_i32_32i8(i32 %a0) {
; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX1-NEXT: # xmm2 = mem[0,0]
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i32_32i8:
@@ -477,7 +464,6 @@ define <32 x i8> @ext_i32_32i8(i32 %a0) {
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm0
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: retq
;
@@ -550,19 +536,18 @@ define <8 x i64> @ext_i8_8i64(i8 %a0) {
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8]
; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm2
; AVX1-NEXT: vpcmpeqq %xmm0, %xmm2, %xmm0
-; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
-; AVX1-NEXT: vpsrlq $63, %xmm2, %xmm2
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [16,32,64,128]
-; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
-; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm2
-; AVX1-NEXT: vpsrlq $63, %xmm2, %xmm2
+; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1,1,1,1]
+; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vmovaps {{.*#+}} ymm3 = [16,32,64,128]
+; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1
+; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm3
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpsrlq $63, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
+; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i8_8i64:
@@ -631,19 +616,18 @@ define <16 x i32> @ext_i16_16i32(i16 %a0) {
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm2
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
-; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
-; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
-; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
-; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm2
-; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vmovaps {{.*#+}} ymm3 = [256,512,1024,2048,4096,8192,16384,32768]
+; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm3
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
+; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i16_16i32:
@@ -712,23 +696,22 @@ define <32 x i16> @ext_i32_32i16(i32 %a0) {
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm3
-; AVX1-NEXT: vpsrlw $15, %xmm3, %xmm3
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [256,512,1024,2048,4096,8192,16384,32768]
-; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
+; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vandps %ymm3, %ymm0, %ymm0
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
-; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm2
-; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT: vpcmpeqw %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlw $15, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT: vpcmpeqw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1
+; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i32_32i16:
@@ -782,26 +765,22 @@ define <64 x i8> @ext_i64_64i8(i64 %a0) {
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0
-; SSE2-SSSE3-NEXT: psrlw $7, %xmm0
; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0
; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1
-; SSE2-SSSE3-NEXT: psrlw $7, %xmm1
; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2
-; SSE2-SSSE3-NEXT: psrlw $7, %xmm2
; SSE2-SSSE3-NEXT: pand %xmm5, %xmm2
; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3
-; SSE2-SSSE3-NEXT: psrlw $7, %xmm3
; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
; SSE2-SSSE3-NEXT: retq
;
@@ -817,26 +796,20 @@ define <64 x i8> @ext_i64_64i8(i64 %a0) {
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
-; AVX1-NEXT: vpsrlw $7, %xmm3, %xmm3
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vandps %ymm3, %ymm0, %ymm0
+; AVX1-NEXT: vpshufhw {{.*#+}} xmm4 = xmm1[0,1,2,3,4,4,5,5]
; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm4, %ymm1
; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[2,2,3,3,6,6,7,7]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
-; AVX1-NEXT: vpsrlw $7, %xmm3, %xmm3
-; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpcmpeqb %xmm2, %xmm4, %xmm4
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
-; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1
+; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i64_64i8:
@@ -847,13 +820,11 @@ define <64 x i8> @ext_i64_64i8(i64 %a0) {
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm0
; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23]
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
-; AVX2-NEXT: vpsrlw $7, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX2-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
index 79d8e4acbba5a5..c27b5b289e1fa9 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
@@ -185,7 +185,6 @@ define <16 x i1> @bitcast_i16_16i1(i16 zeroext %a0) {
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; SSE2-NEXT: pand %xmm1, %xmm0
; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; SSE2-NEXT: psrlw $7, %xmm0
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: retq
;
@@ -196,7 +195,6 @@ define <16 x i1> @bitcast_i16_16i1(i16 zeroext %a0) {
; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; SSSE3-NEXT: pand %xmm1, %xmm0
; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
-; SSSE3-NEXT: psrlw $7, %xmm0
; SSSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSSE3-NEXT: retq
;
@@ -208,7 +206,6 @@ define <16 x i1> @bitcast_i16_16i1(i16 zeroext %a0) {
; AVX1-NEXT: # xmm1 = mem[0,0]
; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: retq
;
@@ -219,7 +216,6 @@ define <16 x i1> @bitcast_i16_16i1(i16 zeroext %a0) {
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $7, %xmm0, %xmm0
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-NEXT: retq
;
@@ -252,13 +248,9 @@ define <32 x i1> @bitcast_i32_32i1(i32 %a0) {
; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX1-NEXT: # xmm2 = mem[0,0]
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: bitcast_i32_32i1:
@@ -269,7 +261,6 @@ define <32 x i1> @bitcast_i32_32i1(i32 %a0) {
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm0
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll b/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll
index 3887d9547fd06e..bc546fe857a3ed 100644
--- a/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll
+++ b/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll
@@ -623,28 +623,24 @@ define <16 x i8> @shl_s3_cmp_v16i8(<16 x i8> %x, <16 x i8> %y) {
; CHECK-NOBMI-LABEL: shl_s3_cmp_v16i8:
; CHECK-NOBMI: # %bb.0:
; CHECK-NOBMI-NEXT: pcmpeqb %xmm1, %xmm0
-; CHECK-NOBMI-NEXT: psllw $3, %xmm0
; CHECK-NOBMI-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI2-SSE2-LABEL: shl_s3_cmp_v16i8:
; CHECK-BMI2-SSE2: # %bb.0:
; CHECK-BMI2-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; CHECK-BMI2-SSE2-NEXT: psllw $3, %xmm0
; CHECK-BMI2-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-BMI2-SSE2-NEXT: retq
;
; CHECK-AVX12-LABEL: shl_s3_cmp_v16i8:
; CHECK-AVX12: # %bb.0:
; CHECK-AVX12-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; CHECK-AVX12-NEXT: vpsllw $3, %xmm0, %xmm0
; CHECK-AVX12-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX12-NEXT: retq
;
; CHECK-AVX512-LABEL: shl_s3_cmp_v16i8:
; CHECK-AVX512: # %bb.0:
; CHECK-AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; CHECK-AVX512-NEXT: vpsllw $3, %xmm0, %xmm0
; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-AVX512-NEXT: retq
%cmp = icmp eq <16 x i8> %x, %y
@@ -673,10 +669,7 @@ define <4 x i64> @shl_s31_cmp_v4f64(<4 x double> %x, <4 x double> %y) {
; CHECK-AVX1-LABEL: shl_s31_cmp_v4f64:
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0
-; CHECK-AVX1-NEXT: vpsllq $31, %xmm0, %xmm1
-; CHECK-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; CHECK-AVX1-NEXT: vpsllq $31, %xmm0, %xmm0
-; CHECK-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-AVX1-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; CHECK-AVX1-NEXT: retq
;
; CHECK-AVX2-LABEL: shl_s31_cmp_v4f64:
@@ -700,28 +693,24 @@ define <16 x i8> @shr_s1_cmp_v16i8(<16 x i8> %x, <16 x i8> %y) {
; CHECK-NOBMI-LABEL: shr_s1_cmp_v16i8:
; CHECK-NOBMI: # %bb.0:
; CHECK-NOBMI-NEXT: pcmpeqb %xmm1, %xmm0
-; CHECK-NOBMI-NEXT: psrlw $1, %xmm0
; CHECK-NOBMI-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI2-SSE2-LABEL: shr_s1_cmp_v16i8:
; CHECK-BMI2-SSE2: # %bb.0:
; CHECK-BMI2-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; CHECK-BMI2-SSE2-NEXT: psrlw $1, %xmm0
; CHECK-BMI2-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-BMI2-SSE2-NEXT: retq
;
; CHECK-AVX12-LABEL: shr_s1_cmp_v16i8:
; CHECK-AVX12: # %bb.0:
; CHECK-AVX12-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; CHECK-AVX12-NEXT: vpsrlw $1, %xmm0, %xmm0
; CHECK-AVX12-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX12-NEXT: retq
;
; CHECK-AVX512-LABEL: shr_s1_cmp_v16i8:
; CHECK-AVX512: # %bb.0:
; CHECK-AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; CHECK-AVX512-NEXT: vpsrlw $1, %xmm0, %xmm0
; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-AVX512-NEXT: retq
%cmp = icmp eq <16 x i8> %x, %y
@@ -753,9 +742,8 @@ define <8 x i32> @shr_s9_cmp_v8i32(<8 x i32> %x, <8 x i32> %y) {
; CHECK-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; CHECK-AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
; CHECK-AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; CHECK-AVX1-NEXT: vpsrld $9, %xmm0, %xmm0
-; CHECK-AVX1-NEXT: vpsrld $9, %xmm2, %xmm1
-; CHECK-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; CHECK-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; CHECK-AVX1-NEXT: retq
;
; CHECK-AVX2-LABEL: shr_s9_cmp_v8i32:
More information about the llvm-commits
mailing list