[llvm] r346102 - [X86] Add vector shift by immediate to SimplifyDemandedBitsForTargetNode.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 4 09:31:28 PST 2018
Author: ctopper
Date: Sun Nov 4 09:31:27 2018
New Revision: 346102
URL: http://llvm.org/viewvc/llvm-project?rev=346102&view=rev
Log:
[X86] Add vector shift by immediate to SimplifyDemandedBitsForTargetNode.
Summary: This also enables some constant folding from KnownBits propagation. This helps on some cases vXi64 case in 32-bit mode where constant vectors appear as vXi32 and a bitcast. This can prevent getNode from constant folding sra/shl/srl.
Reviewers: RKSimon, spatel
Reviewed By: spatel
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D54069
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/combine-srl.ll
llvm/trunk/test/CodeGen/X86/combine-udiv.ll
llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll
llvm/trunk/test/CodeGen/X86/pr35918.ll
llvm/trunk/test/CodeGen/X86/vector-shift-ashr-128.ll
llvm/trunk/test/CodeGen/X86/vector-shift-ashr-256.ll
llvm/trunk/test/CodeGen/X86/vector-trunc-usat.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=346102&r1=346101&r2=346102&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Nov 4 09:31:27 2018
@@ -31817,6 +31817,7 @@ bool X86TargetLowering::SimplifyDemanded
bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
SDValue Op, const APInt &OriginalDemandedBits, KnownBits &Known,
TargetLoweringOpt &TLO, unsigned Depth) const {
+ unsigned BitWidth = OriginalDemandedBits.getBitWidth();
unsigned Opc = Op.getOpcode();
switch(Opc) {
case X86ISD::PMULDQ:
@@ -31833,6 +31834,42 @@ bool X86TargetLowering::SimplifyDemanded
return true;
break;
}
+ case X86ISD::VSHLI: {
+ if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (ShiftImm->getAPIntValue().uge(BitWidth))
+ break;
+
+ KnownBits KnownOp;
+ unsigned ShAmt = ShiftImm->getZExtValue();
+ APInt DemandedMask = OriginalDemandedBits.lshr(ShAmt);
+ if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, KnownOp, TLO,
+ Depth + 1))
+ return true;
+ }
+ break;
+ }
+ case X86ISD::VSRAI:
+ case X86ISD::VSRLI: {
+ if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (ShiftImm->getAPIntValue().uge(BitWidth))
+ break;
+
+ KnownBits KnownOp;
+ unsigned ShAmt = ShiftImm->getZExtValue();
+ APInt DemandedMask = OriginalDemandedBits << ShAmt;
+
+ // If any of the demanded bits are produced by the sign extension, we also
+ // demand the input sign bit.
+ if (Opc == X86ISD::VSRAI &&
+ OriginalDemandedBits.countLeadingZeros() < ShAmt)
+ DemandedMask.setSignBit();
+
+ if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, KnownOp, TLO,
+ Depth + 1))
+ return true;
+ }
+ break;
+ }
}
return TargetLowering::SimplifyDemandedBitsForTargetNode(
@@ -34861,6 +34898,11 @@ static SDValue combineVectorShiftImm(SDN
return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N));
}
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0),
+ APInt::getAllOnesValue(NumBitsPerElt), DCI))
+ return SDValue(N, 0);
+
return SDValue();
}
Modified: llvm/trunk/test/CodeGen/X86/combine-srl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-srl.ll?rev=346102&r1=346101&r2=346102&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-srl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-srl.ll Sun Nov 4 09:31:27 2018
@@ -63,17 +63,7 @@ define <4 x i32> @combine_vec_lshr_known
define <4 x i32> @combine_vec_lshr_known_zero1(<4 x i32> %x) {
; SSE-LABEL: combine_vec_lshr_known_zero1:
; SSE: # %bb.0:
-; SSE-NEXT: pand {{.*}}(%rip), %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrld $11, %xmm1
-; SSE-NEXT: movdqa %xmm0, %xmm2
-; SSE-NEXT: psrld $9, %xmm2
-; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrld $10, %xmm1
-; SSE-NEXT: psrld $8, %xmm0
-; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
-; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_lshr_known_zero1:
Modified: llvm/trunk/test/CodeGen/X86/combine-udiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-udiv.ll?rev=346102&r1=346101&r2=346102&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-udiv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-udiv.ll Sun Nov 4 09:31:27 2018
@@ -669,20 +669,15 @@ define <16 x i8> @combine_vec_udiv_nonun
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; SSE41-NEXT: pmullw %xmm0, %xmm2
; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
-; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; SSE41-NEXT: pmullw %xmm3, %xmm0
-; SSE41-NEXT: psrlw $8, %xmm0
-; SSE41-NEXT: movdqa %xmm2, %xmm3
-; SSE41-NEXT: packuswb %xmm0, %xmm3
-; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; SSE41-NEXT: psllw $1, %xmm3
-; SSE41-NEXT: psllw $8, %xmm2
-; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3,4,5,6,7]
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: psllw $8, %xmm0
+; SSE41-NEXT: pxor %xmm3, %xmm3
+; SSE41-NEXT: packuswb %xmm3, %xmm2
+; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; SSE41-NEXT: psllw $1, %xmm2
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3,4,5,6,7]
; SSE41-NEXT: psrlw $8, %xmm2
-; SSE41-NEXT: packuswb %xmm0, %xmm2
+; SSE41-NEXT: packuswb %xmm3, %xmm2
; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2
; SSE41-NEXT: movdqa %xmm2, %xmm0
@@ -693,21 +688,16 @@ define <16 x i8> @combine_vec_udiv_nonun
; AVX1-NEXT: movl $171, %eax
; AVX1-NEXT: vmovd %eax, %xmm1
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT: vpmullw %xmm1, %xmm2, %xmm2
-; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
-; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX1-NEXT: vpmullw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
+; AVX1-NEXT: vpsllw $8, %xmm1, %xmm2
+; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; AVX1-NEXT: vpmullw %xmm1, %xmm3, %xmm1
+; AVX1-NEXT: vpsllw $1, %xmm1, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
-; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm3
-; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; AVX1-NEXT: vpsllw $1, %xmm3, %xmm3
-; AVX1-NEXT: vpsllw $8, %xmm2, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3,4,5,6,7]
-; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
-; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll?rev=346102&r1=346101&r2=346102&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll Sun Nov 4 09:31:27 2018
@@ -91,17 +91,14 @@ define float @signbits_ashr_extract_sito
; X32-LABEL: signbits_ashr_extract_sitofp_1:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
-; X32-NEXT: vmovdqa {{.*#+}} xmm1 = [0,2147483648,0,2147483648]
-; X32-NEXT: vpsrlq $63, %xmm1, %xmm2
-; X32-NEXT: vpsrlq $32, %xmm1, %xmm1
-; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
-; X32-NEXT: vpsrlq $63, %xmm0, %xmm2
+; X32-NEXT: vpsrlq $63, %xmm0, %xmm1
; X32-NEXT: vpsrlq $32, %xmm0, %xmm0
-; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; X32-NEXT: vmovdqa {{.*#+}} xmm1 = [0,32768,0,0,1,0,0,0]
; X32-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X32-NEXT: vpsubq %xmm1, %xmm0, %xmm0
; X32-NEXT: vmovd %xmm0, %eax
-; X32-NEXT: vcvtsi2ssl %eax, %xmm3, %xmm0
+; X32-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
; X32-NEXT: vmovss %xmm0, (%esp)
; X32-NEXT: flds (%esp)
; X32-NEXT: popl %eax
@@ -128,18 +125,15 @@ define float @signbits_ashr_shl_extract_
; X32-LABEL: signbits_ashr_shl_extract_sitofp:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
-; X32-NEXT: vmovdqa {{.*#+}} xmm1 = [0,2147483648,0,2147483648]
-; X32-NEXT: vpsrlq $60, %xmm1, %xmm2
-; X32-NEXT: vpsrlq $61, %xmm1, %xmm1
-; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
-; X32-NEXT: vpsrlq $60, %xmm0, %xmm2
+; X32-NEXT: vpsrlq $60, %xmm0, %xmm1
; X32-NEXT: vpsrlq $61, %xmm0, %xmm0
-; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; X32-NEXT: vmovdqa {{.*#+}} xmm1 = [4,0,0,0,8,0,0,0]
; X32-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X32-NEXT: vpsubq %xmm1, %xmm0, %xmm0
; X32-NEXT: vpsllq $20, %xmm0, %xmm0
; X32-NEXT: vmovd %xmm0, %eax
-; X32-NEXT: vcvtsi2ssl %eax, %xmm3, %xmm0
+; X32-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
; X32-NEXT: vmovss %xmm0, (%esp)
; X32-NEXT: flds (%esp)
; X32-NEXT: popl %eax
@@ -263,13 +257,10 @@ define float @signbits_ashr_sext_sextinr
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
-; X32-NEXT: vpsrlq $60, %xmm2, %xmm3
-; X32-NEXT: vpsrlq $61, %xmm2, %xmm2
-; X32-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7]
-; X32-NEXT: vpsrlq $60, %xmm0, %xmm3
+; X32-NEXT: vpsrlq $60, %xmm0, %xmm2
; X32-NEXT: vpsrlq $61, %xmm0, %xmm0
-; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
+; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [4,0,0,0,8,0,0,0]
; X32-NEXT: vpxor %xmm2, %xmm0, %xmm0
; X32-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; X32-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1
@@ -281,7 +272,7 @@ define float @signbits_ashr_sext_sextinr
; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
; X32-NEXT: vpand %xmm1, %xmm0, %xmm0
; X32-NEXT: vmovd %xmm0, %eax
-; X32-NEXT: vcvtsi2ssl %eax, %xmm4, %xmm0
+; X32-NEXT: vcvtsi2ssl %eax, %xmm3, %xmm0
; X32-NEXT: vmovss %xmm0, (%esp)
; X32-NEXT: flds (%esp)
; X32-NEXT: popl %eax
@@ -320,13 +311,10 @@ define float @signbits_ashr_sextvecinreg
; X32-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
-; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
-; X32-NEXT: vpsrlq $60, %xmm2, %xmm3
-; X32-NEXT: vpsrlq $61, %xmm2, %xmm2
-; X32-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7]
-; X32-NEXT: vpsrlq $60, %xmm0, %xmm3
+; X32-NEXT: vpsrlq $60, %xmm0, %xmm2
; X32-NEXT: vpsrlq $61, %xmm0, %xmm0
-; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
+; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [4,0,0,0,8,0,0,0]
; X32-NEXT: vpxor %xmm2, %xmm0, %xmm0
; X32-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; X32-NEXT: vpmovsxdq %xmm1, %xmm1
@@ -334,7 +322,7 @@ define float @signbits_ashr_sextvecinreg
; X32-NEXT: vpor %xmm1, %xmm2, %xmm1
; X32-NEXT: vpxor %xmm0, %xmm1, %xmm0
; X32-NEXT: vmovd %xmm0, %eax
-; X32-NEXT: vcvtsi2ssl %eax, %xmm4, %xmm0
+; X32-NEXT: vcvtsi2ssl %eax, %xmm3, %xmm0
; X32-NEXT: vmovss %xmm0, (%esp)
; X32-NEXT: flds (%esp)
; X32-NEXT: popl %eax
@@ -375,22 +363,19 @@ define <4 x float> @signbits_ashr_sext_s
; X32-NEXT: subl $16, %esp
; X32-NEXT: vpmovsxdq 16(%ebp), %xmm3
; X32-NEXT: vpmovsxdq 8(%ebp), %xmm4
-; X32-NEXT: vmovdqa {{.*#+}} xmm5 = [0,2147483648,0,2147483648]
+; X32-NEXT: vextractf128 $1, %ymm2, %xmm5
; X32-NEXT: vpsrlq $63, %xmm5, %xmm6
; X32-NEXT: vpsrlq $33, %xmm5, %xmm5
; X32-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm6[4,5,6,7]
-; X32-NEXT: vextractf128 $1, %ymm2, %xmm6
-; X32-NEXT: vpsrlq $63, %xmm6, %xmm7
-; X32-NEXT: vpsrlq $33, %xmm6, %xmm6
-; X32-NEXT: vpblendw {{.*#+}} xmm6 = xmm6[0,1,2,3],xmm7[4,5,6,7]
-; X32-NEXT: vpxor %xmm5, %xmm6, %xmm6
-; X32-NEXT: vpsubq %xmm5, %xmm6, %xmm6
+; X32-NEXT: vmovdqa {{.*#+}} xmm6 = [0,16384,0,0,1,0,0,0]
+; X32-NEXT: vpxor %xmm6, %xmm5, %xmm5
+; X32-NEXT: vpsubq %xmm6, %xmm5, %xmm5
; X32-NEXT: vpsrlq $63, %xmm2, %xmm7
; X32-NEXT: vpsrlq $33, %xmm2, %xmm2
; X32-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm7[4,5,6,7]
-; X32-NEXT: vpxor %xmm5, %xmm2, %xmm2
-; X32-NEXT: vpsubq %xmm5, %xmm2, %xmm2
-; X32-NEXT: vinsertf128 $1, %xmm6, %ymm2, %ymm2
+; X32-NEXT: vpxor %xmm6, %xmm2, %xmm2
+; X32-NEXT: vpsubq %xmm6, %xmm2, %xmm2
+; X32-NEXT: vinsertf128 $1, %xmm5, %ymm2, %ymm2
; X32-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
; X32-NEXT: vextractf128 $1, %ymm1, %xmm4
; X32-NEXT: vextractf128 $1, %ymm0, %xmm5
Modified: llvm/trunk/test/CodeGen/X86/pr35918.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr35918.ll?rev=346102&r1=346101&r2=346102&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr35918.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr35918.ll Sun Nov 4 09:31:27 2018
@@ -11,9 +11,9 @@ define void @fetch_r16g16_snorm_unorm8(<
; X86-SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-SKYLAKE-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SKYLAKE-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X86-SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X86-SKYLAKE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
; X86-SKYLAKE-NEXT: vpsrad $16, %xmm0, %xmm0
+; X86-SKYLAKE-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X86-SKYLAKE-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; X86-SKYLAKE-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
; X86-SKYLAKE-NEXT: vpsrld $7, %xmm0, %xmm0
@@ -29,7 +29,7 @@ define void @fetch_r16g16_snorm_unorm8(<
; X86-SKX-NEXT: subl $8, %esp
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X86-SKX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[0,1],zero,zero,xmm0[2,3],zero,zero,xmm0[u,u],zero,zero,xmm0[u,u]
+; X86-SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,0,1,u,u,2,3,u,u,u,u,u,u,u,u]
; X86-SKX-NEXT: vpsrad $16, %xmm0, %xmm0
; X86-SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X86-SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
@@ -50,9 +50,9 @@ define void @fetch_r16g16_snorm_unorm8(<
; X64-SKYLAKE-LABEL: fetch_r16g16_snorm_unorm8:
; X64-SKYLAKE: # %bb.0: # %entry
; X64-SKYLAKE-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-SKYLAKE-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; X64-SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-SKYLAKE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
; X64-SKYLAKE-NEXT: vpsrad $16, %xmm0, %xmm0
+; X64-SKYLAKE-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-SKYLAKE-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
; X64-SKYLAKE-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
; X64-SKYLAKE-NEXT: vpsrld $7, %xmm0, %xmm0
@@ -65,7 +65,7 @@ define void @fetch_r16g16_snorm_unorm8(<
; X64-SKX-LABEL: fetch_r16g16_snorm_unorm8:
; X64-SKX: # %bb.0: # %entry
; X64-SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-SKX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[0,1],zero,zero,xmm0[2,3],zero,zero,xmm0[u,u],zero,zero,xmm0[u,u]
+; X64-SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,0,1,u,u,2,3,u,u,u,u,u,u,u,u]
; X64-SKX-NEXT: vpsrad $16, %xmm0, %xmm0
; X64-SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
Modified: llvm/trunk/test/CodeGen/X86/vector-shift-ashr-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shift-ashr-128.ll?rev=346102&r1=346101&r2=346102&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shift-ashr-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shift-ashr-128.ll Sun Nov 4 09:31:27 2018
@@ -990,15 +990,11 @@ define <2 x i64> @constant_shift_v2i64(<
;
; X32-SSE-LABEL: constant_shift_v2i64:
; X32-SSE: # %bb.0:
-; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,2147483648,0,2147483648]
-; X32-SSE-NEXT: movdqa %xmm1, %xmm2
-; X32-SSE-NEXT: psrlq $1, %xmm2
-; X32-SSE-NEXT: psrlq $7, %xmm1
-; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
-; X32-SSE-NEXT: movdqa %xmm0, %xmm2
-; X32-SSE-NEXT: psrlq $1, %xmm2
+; X32-SSE-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE-NEXT: psrlq $1, %xmm1
; X32-SSE-NEXT: psrlq $7, %xmm0
-; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
+; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; X32-SSE-NEXT: movapd {{.*#+}} xmm1 = [2.0E+0,7.2911220195563975E-304]
; X32-SSE-NEXT: xorpd %xmm1, %xmm0
; X32-SSE-NEXT: psubq %xmm1, %xmm0
; X32-SSE-NEXT: retl
Modified: llvm/trunk/test/CodeGen/X86/vector-shift-ashr-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shift-ashr-256.ll?rev=346102&r1=346101&r2=346102&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shift-ashr-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shift-ashr-256.ll Sun Nov 4 09:31:27 2018
@@ -1066,25 +1066,20 @@ define <4 x i64> @constant_shift_v4i64(<
;
; X32-AVX1-LABEL: constant_shift_v4i64:
; X32-AVX1: # %bb.0:
-; X32-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [0,2147483648,0,2147483648]
+; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X32-AVX1-NEXT: vpsrlq $62, %xmm1, %xmm2
-; X32-AVX1-NEXT: vpsrlq $31, %xmm1, %xmm3
-; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
-; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; X32-AVX1-NEXT: vpsrlq $62, %xmm3, %xmm4
-; X32-AVX1-NEXT: vpsrlq $31, %xmm3, %xmm3
-; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm4[4,5,6,7]
-; X32-AVX1-NEXT: vpxor %xmm2, %xmm3, %xmm3
-; X32-AVX1-NEXT: vpsubq %xmm2, %xmm3, %xmm2
-; X32-AVX1-NEXT: vpsrlq $7, %xmm1, %xmm3
-; X32-AVX1-NEXT: vpsrlq $1, %xmm1, %xmm1
-; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
-; X32-AVX1-NEXT: vpsrlq $7, %xmm0, %xmm3
+; X32-AVX1-NEXT: vpsrlq $31, %xmm1, %xmm1
+; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
+; X32-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,1,0,2,0,0,0]
+; X32-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; X32-AVX1-NEXT: vpsubq %xmm2, %xmm1, %xmm1
+; X32-AVX1-NEXT: vpsrlq $7, %xmm0, %xmm2
; X32-AVX1-NEXT: vpsrlq $1, %xmm0, %xmm0
-; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
-; X32-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; X32-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,16384,0,0,0,256]
+; X32-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; X32-AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0
+; X32-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X32-AVX1-NEXT: retl
;
; X32-AVX2-LABEL: constant_shift_v4i64:
Modified: llvm/trunk/test/CodeGen/X86/vector-trunc-usat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc-usat.ll?rev=346102&r1=346101&r2=346102&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc-usat.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc-usat.ll Sun Nov 4 09:31:27 2018
@@ -716,26 +716,26 @@ define <8 x i16> @trunc_usat_v8i64_v8i16
define <8 x i16> @trunc_usat_v8i32_v8i16(<8 x i32> %a0) {
; SSE2-LABEL: trunc_usat_v8i32_v8i16:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
-; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
-; SSE2-NEXT: movdqa %xmm0, %xmm4
-; SSE2-NEXT: pxor %xmm3, %xmm4
-; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147549183,2147549183,2147549183,2147549183]
-; SSE2-NEXT: movdqa %xmm5, %xmm6
-; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
-; SSE2-NEXT: pand %xmm6, %xmm0
-; SSE2-NEXT: pandn %xmm2, %xmm6
-; SSE2-NEXT: por %xmm6, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm3
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm2, %xmm3
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147549183,2147549183,2147549183,2147549183]
+; SSE2-NEXT: movdqa %xmm4, %xmm5
; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
-; SSE2-NEXT: pand %xmm5, %xmm1
-; SSE2-NEXT: pandn %xmm2, %xmm5
-; SSE2-NEXT: por %xmm1, %xmm5
-; SSE2-NEXT: pslld $16, %xmm5
-; SSE2-NEXT: psrad $16, %xmm5
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT: pand %xmm5, %xmm0
+; SSE2-NEXT: pxor %xmm3, %xmm5
+; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
+; SSE2-NEXT: pxor %xmm4, %xmm3
+; SSE2-NEXT: pand %xmm1, %xmm4
+; SSE2-NEXT: por %xmm3, %xmm4
+; SSE2-NEXT: pslld $16, %xmm4
+; SSE2-NEXT: psrad $16, %xmm4
; SSE2-NEXT: pslld $16, %xmm0
; SSE2-NEXT: psrad $16, %xmm0
-; SSE2-NEXT: packssdw %xmm5, %xmm0
+; SSE2-NEXT: packssdw %xmm4, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: trunc_usat_v8i32_v8i16:
@@ -826,36 +826,36 @@ define <8 x i16> @trunc_usat_v8i32_v8i16
define <16 x i16> @trunc_usat_v16i32_v16i16(<16 x i32> %a0) {
; SSE2-LABEL: trunc_usat_v16i32_v16i16:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm1, %xmm4
-; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [65535,65535,65535,65535]
-; SSE2-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
-; SSE2-NEXT: movdqa %xmm2, %xmm6
-; SSE2-NEXT: pxor %xmm7, %xmm6
+; SSE2-NEXT: movdqa %xmm1, %xmm8
+; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: movdqa %xmm2, %xmm7
+; SSE2-NEXT: pxor %xmm6, %xmm7
; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147549183,2147549183,2147549183,2147549183]
; SSE2-NEXT: movdqa %xmm5, %xmm1
-; SSE2-NEXT: pcmpgtd %xmm6, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm7, %xmm1
+; SSE2-NEXT: pcmpeqd %xmm7, %xmm7
; SSE2-NEXT: pand %xmm1, %xmm2
-; SSE2-NEXT: pandn %xmm8, %xmm1
+; SSE2-NEXT: pxor %xmm7, %xmm1
; SSE2-NEXT: por %xmm2, %xmm1
-; SSE2-NEXT: movdqa %xmm3, %xmm6
-; SSE2-NEXT: pxor %xmm7, %xmm6
+; SSE2-NEXT: movdqa %xmm3, %xmm4
+; SSE2-NEXT: pxor %xmm6, %xmm4
; SSE2-NEXT: movdqa %xmm5, %xmm2
-; SSE2-NEXT: pcmpgtd %xmm6, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm2
; SSE2-NEXT: pand %xmm2, %xmm3
-; SSE2-NEXT: pandn %xmm8, %xmm2
+; SSE2-NEXT: pxor %xmm7, %xmm2
; SSE2-NEXT: por %xmm3, %xmm2
; SSE2-NEXT: movdqa %xmm0, %xmm3
-; SSE2-NEXT: pxor %xmm7, %xmm3
-; SSE2-NEXT: movdqa %xmm5, %xmm6
-; SSE2-NEXT: pcmpgtd %xmm3, %xmm6
-; SSE2-NEXT: pand %xmm6, %xmm0
-; SSE2-NEXT: pandn %xmm8, %xmm6
-; SSE2-NEXT: por %xmm6, %xmm0
-; SSE2-NEXT: pxor %xmm4, %xmm7
-; SSE2-NEXT: pcmpgtd %xmm7, %xmm5
-; SSE2-NEXT: pand %xmm5, %xmm4
-; SSE2-NEXT: pandn %xmm8, %xmm5
-; SSE2-NEXT: por %xmm4, %xmm5
+; SSE2-NEXT: pxor %xmm6, %xmm3
+; SSE2-NEXT: movdqa %xmm5, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: pand %xmm4, %xmm0
+; SSE2-NEXT: pxor %xmm7, %xmm4
+; SSE2-NEXT: por %xmm4, %xmm0
+; SSE2-NEXT: pxor %xmm8, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT: pxor %xmm5, %xmm7
+; SSE2-NEXT: pand %xmm8, %xmm5
+; SSE2-NEXT: por %xmm7, %xmm5
; SSE2-NEXT: pslld $16, %xmm5
; SSE2-NEXT: psrad $16, %xmm5
; SSE2-NEXT: pslld $16, %xmm0
@@ -870,36 +870,36 @@ define <16 x i16> @trunc_usat_v16i32_v16
;
; SSSE3-LABEL: trunc_usat_v16i32_v16i16:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: movdqa %xmm1, %xmm4
-; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [65535,65535,65535,65535]
-; SSSE3-NEXT: movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
-; SSSE3-NEXT: movdqa %xmm2, %xmm6
-; SSSE3-NEXT: pxor %xmm7, %xmm6
+; SSSE3-NEXT: movdqa %xmm1, %xmm8
+; SSSE3-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
+; SSSE3-NEXT: movdqa %xmm2, %xmm7
+; SSSE3-NEXT: pxor %xmm6, %xmm7
; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147549183,2147549183,2147549183,2147549183]
; SSSE3-NEXT: movdqa %xmm5, %xmm1
-; SSSE3-NEXT: pcmpgtd %xmm6, %xmm1
+; SSSE3-NEXT: pcmpgtd %xmm7, %xmm1
+; SSSE3-NEXT: pcmpeqd %xmm7, %xmm7
; SSSE3-NEXT: pand %xmm1, %xmm2
-; SSSE3-NEXT: pandn %xmm8, %xmm1
+; SSSE3-NEXT: pxor %xmm7, %xmm1
; SSSE3-NEXT: por %xmm2, %xmm1
-; SSSE3-NEXT: movdqa %xmm3, %xmm6
-; SSSE3-NEXT: pxor %xmm7, %xmm6
+; SSSE3-NEXT: movdqa %xmm3, %xmm4
+; SSSE3-NEXT: pxor %xmm6, %xmm4
; SSSE3-NEXT: movdqa %xmm5, %xmm2
-; SSSE3-NEXT: pcmpgtd %xmm6, %xmm2
+; SSSE3-NEXT: pcmpgtd %xmm4, %xmm2
; SSSE3-NEXT: pand %xmm2, %xmm3
-; SSSE3-NEXT: pandn %xmm8, %xmm2
+; SSSE3-NEXT: pxor %xmm7, %xmm2
; SSSE3-NEXT: por %xmm3, %xmm2
; SSSE3-NEXT: movdqa %xmm0, %xmm3
-; SSSE3-NEXT: pxor %xmm7, %xmm3
-; SSSE3-NEXT: movdqa %xmm5, %xmm6
-; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6
-; SSSE3-NEXT: pand %xmm6, %xmm0
-; SSSE3-NEXT: pandn %xmm8, %xmm6
-; SSSE3-NEXT: por %xmm6, %xmm0
-; SSSE3-NEXT: pxor %xmm4, %xmm7
-; SSSE3-NEXT: pcmpgtd %xmm7, %xmm5
-; SSSE3-NEXT: pand %xmm5, %xmm4
-; SSSE3-NEXT: pandn %xmm8, %xmm5
-; SSSE3-NEXT: por %xmm4, %xmm5
+; SSSE3-NEXT: pxor %xmm6, %xmm3
+; SSSE3-NEXT: movdqa %xmm5, %xmm4
+; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4
+; SSSE3-NEXT: pand %xmm4, %xmm0
+; SSSE3-NEXT: pxor %xmm7, %xmm4
+; SSSE3-NEXT: por %xmm4, %xmm0
+; SSSE3-NEXT: pxor %xmm8, %xmm6
+; SSSE3-NEXT: pcmpgtd %xmm6, %xmm5
+; SSSE3-NEXT: pxor %xmm5, %xmm7
+; SSSE3-NEXT: pand %xmm8, %xmm5
+; SSSE3-NEXT: por %xmm7, %xmm5
; SSSE3-NEXT: pslld $16, %xmm5
; SSSE3-NEXT: psrad $16, %xmm5
; SSSE3-NEXT: pslld $16, %xmm0
More information about the llvm-commits
mailing list