[llvm] 2167ae9 - [DAG] hoistLogicOpWithSameOpcodeHands - add support for *_EXTEND_VECTOR_INREG nodes.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 19 02:50:36 PDT 2023
Author: Simon Pilgrim
Date: 2023-07-19T10:50:23+01:00
New Revision: 2167ae93c97b6d978ccdd8bc509d8f43aea62cca
URL: https://github.com/llvm/llvm-project/commit/2167ae93c97b6d978ccdd8bc509d8f43aea62cca
DIFF: https://github.com/llvm/llvm-project/commit/2167ae93c97b6d978ccdd8bc509d8f43aea62cca.diff
LOG: [DAG] hoistLogicOpWithSameOpcodeHands - add support for *_EXTEND_VECTOR_INREG nodes.
This can reuse the existing *_EXTEND node handling.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll
llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
llvm/test/CodeGen/X86/vector-ext-logic.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9f222268fc1277..9945e1d4494300 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5704,8 +5704,7 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
SDValue Y = N1.getOperand(0);
EVT XVT = X.getValueType();
SDLoc DL(N);
- if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
- HandOpcode == ISD::SIGN_EXTEND) {
+ if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode)) {
// If both operands have other uses, this transform would create extra
// instructions without eliminating anything.
if (!N0.hasOneUse() && !N1.hasOneUse())
@@ -5720,8 +5719,9 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
return SDValue();
// Avoid infinite looping with PromoteIntBinOp.
// TODO: Should we apply desirable/legal constraints to all opcodes?
- if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
- !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
+ if ((HandOpcode == ISD::ANY_EXTEND ||
+ HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
+ LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
return SDValue();
// logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
diff --git a/llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll b/llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll
index 3e64af62169638..d0653bcfb29b61 100644
--- a/llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll
+++ b/llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll
@@ -181,26 +181,21 @@ define i2 @v2i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
; SSE2-LABEL: v2i8:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtb %xmm1, %xmm0
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; SSE2-NEXT: pcmpgtb %xmm3, %xmm2
+; SSE2-NEXT: pand %xmm0, %xmm2
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,1,1,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: movmskpd %xmm1, %eax
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,1,1,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT: movmskpd %xmm0, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
; SSE2-NEXT: retq
;
; SSSE3-LABEL: v2i8:
; SSSE3: # %bb.0:
; SSSE3-NEXT: pcmpgtb %xmm1, %xmm0
-; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
-; SSSE3-NEXT: pshufb %xmm1, %xmm0
; SSSE3-NEXT: pcmpgtb %xmm3, %xmm2
-; SSSE3-NEXT: pshufb %xmm1, %xmm2
; SSSE3-NEXT: pand %xmm0, %xmm2
+; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,u,0,u,u,u,u,u,u,u,1]
; SSSE3-NEXT: movmskpd %xmm2, %eax
; SSSE3-NEXT: # kill: def $al killed $al killed $eax
; SSSE3-NEXT: retq
@@ -208,10 +203,9 @@ define i2 @v2i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d) {
; AVX12-LABEL: v2i8:
; AVX12: # %bb.0:
; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX12-NEXT: vpmovsxbq %xmm0, %xmm0
; AVX12-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm1
-; AVX12-NEXT: vpmovsxbq %xmm1, %xmm1
; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpmovsxbq %xmm0, %xmm0
; AVX12-NEXT: vmovmskpd %xmm0, %eax
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: retq
@@ -249,23 +243,20 @@ define i2 @v2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i16> %d) {
; SSE2-SSSE3-LABEL: v2i16:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
-; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; SSE2-SSSE3-NEXT: pcmpgtw %xmm3, %xmm2
-; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,0,1,1,4,5,6,7]
-; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
-; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
-; SSE2-SSSE3-NEXT: movmskpd %xmm1, %eax
+; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2
+; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,1,1,4,5,6,7]
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-SSSE3-NEXT: movmskpd %xmm0, %eax
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
; SSE2-SSSE3-NEXT: retq
;
; AVX12-LABEL: v2i16:
; AVX12: # %bb.0:
; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
-; AVX12-NEXT: vpmovsxwq %xmm0, %xmm0
; AVX12-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm1
-; AVX12-NEXT: vpmovsxwq %xmm1, %xmm1
; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpmovsxwq %xmm0, %xmm0
; AVX12-NEXT: vmovmskpd %xmm0, %eax
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: retq
@@ -303,21 +294,19 @@ define i2 @v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) {
; SSE2-SSSE3-LABEL: v2i32:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm2
-; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,0,1,1]
-; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
-; SSE2-SSSE3-NEXT: movmskpd %xmm1, %eax
+; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2
+; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,0,1,1]
+; SSE2-SSSE3-NEXT: movmskpd %xmm0, %eax
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
; SSE2-SSSE3-NEXT: retq
;
; AVX12-LABEL: v2i32:
; AVX12: # %bb.0:
; AVX12-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX12-NEXT: vpmovsxdq %xmm0, %xmm0
; AVX12-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm1
-; AVX12-NEXT: vpmovsxdq %xmm1, %xmm1
; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpmovsxdq %xmm0, %xmm0
; AVX12-NEXT: vmovmskpd %xmm0, %eax
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: retq
@@ -447,12 +436,10 @@ define i4 @v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) {
; SSE2-SSSE3-LABEL: v4i8:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm2
+; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2
; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
-; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2
; SSE2-SSSE3-NEXT: movmskps %xmm2, %eax
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
; SSE2-SSSE3-NEXT: retq
@@ -460,10 +447,9 @@ define i4 @v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) {
; AVX12-LABEL: v4i8:
; AVX12: # %bb.0:
; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX12-NEXT: vpmovsxbd %xmm0, %xmm0
; AVX12-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm1
-; AVX12-NEXT: vpmovsxbd %xmm1, %xmm1
; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpmovsxbd %xmm0, %xmm0
; AVX12-NEXT: vmovmskps %xmm0, %eax
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: retq
@@ -501,10 +487,9 @@ define i4 @v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
; SSE2-SSSE3-LABEL: v4i16:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-SSSE3-NEXT: pcmpgtw %xmm3, %xmm2
-; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2
+; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
; SSE2-SSSE3-NEXT: movmskps %xmm2, %eax
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
; SSE2-SSSE3-NEXT: retq
@@ -512,10 +497,9 @@ define i4 @v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
; AVX12-LABEL: v4i16:
; AVX12: # %bb.0:
; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
-; AVX12-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX12-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm1
-; AVX12-NEXT: vpmovsxwd %xmm1, %xmm1
; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX12-NEXT: vmovmskps %xmm0, %eax
; AVX12-NEXT: # kill: def $al killed $al killed $eax
; AVX12-NEXT: retq
@@ -553,10 +537,9 @@ define i8 @v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
; SSE2-SSSE3-LABEL: v8i8:
; SSE2-SSSE3: # %bb.0:
; SSE2-SSSE3-NEXT: pcmpgtb %xmm1, %xmm0
-; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm2
-; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2
+; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-SSSE3-NEXT: packsswb %xmm2, %xmm2
; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %eax
; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax
@@ -565,10 +548,9 @@ define i8 @v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
; AVX12-LABEL: v8i8:
; AVX12: # %bb.0:
; AVX12-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX12-NEXT: vpmovsxbw %xmm0, %xmm0
; AVX12-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm1
-; AVX12-NEXT: vpmovsxbw %xmm1, %xmm1
; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX12-NEXT: vpmovsxbw %xmm0, %xmm0
; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX12-NEXT: vpmovmskb %xmm0, %eax
; AVX12-NEXT: # kill: def $al killed $al killed $eax
diff --git a/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
index 0db3e47ce89745..92d4830452a8d1 100644
--- a/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
+++ b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll
@@ -939,30 +939,27 @@ define <4 x i1> @eq_or_to_abs_vec4x16(<4 x i16> %x) {
; AVX2-LABEL: eq_or_to_abs_vec4x16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
-; AVX2-NEXT: vpmovsxwd %xmm1, %xmm1
; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX2-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX2-NEXT: retq
;
; SSE41-LABEL: eq_or_to_abs_vec4x16:
; SSE41: # %bb.0:
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u>
; SSE41-NEXT: pcmpeqw %xmm0, %xmm1
-; SSE41-NEXT: pmovsxwd %xmm1, %xmm1
; SSE41-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE41-NEXT: pmovsxwd %xmm0, %xmm0
; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: pmovsxwd %xmm0, %xmm0
; SSE41-NEXT: retq
;
; SSE2-LABEL: eq_or_to_abs_vec4x16:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u>
; SSE2-NEXT: pcmpeqw %xmm0, %xmm1
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
; SSE2-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-NEXT: retq
%cmp1 = icmp eq <4 x i16> %x, <i16 88, i16 88, i16 88, i16 88>
%cmp2 = icmp eq <4 x i16> %x, <i16 -88, i16 -88, i16 -88, i16 -88>
@@ -1035,12 +1032,10 @@ define <4 x i1> @ne_and_to_abs_vec4x8(<4 x i8> %x) {
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpmovsxbd %xmm1, %xmm1
; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vpmovsxbd %xmm0, %xmm0
-; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
;
; SSE41-LABEL: ne_and_to_abs_vec4x8:
@@ -1048,26 +1043,22 @@ define <4 x i1> @ne_and_to_abs_vec4x8(<4 x i8> %x) {
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u>
; SSE41-NEXT: pcmpeqb %xmm0, %xmm1
; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
-; SSE41-NEXT: pxor %xmm2, %xmm1
-; SSE41-NEXT: pmovsxbd %xmm1, %xmm1
; SSE41-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm2, %xmm0
; SSE41-NEXT: pmovsxbd %xmm0, %xmm0
-; SSE41-NEXT: pand %xmm1, %xmm0
; SSE41-NEXT: retq
;
; SSE2-LABEL: ne_and_to_abs_vec4x8:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u>
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: pcmpeqd %xmm2, %xmm2
; SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: por %xmm1, %xmm0
-; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm0
; SSE2-NEXT: retq
%cmp1 = icmp ne <4 x i8> %x, <i8 88, i8 88, i8 88, i8 88>
%cmp2 = icmp ne <4 x i8> %x, <i8 -88, i8 -88, i8 -88, i8 -88>
diff --git a/llvm/test/CodeGen/X86/vector-ext-logic.ll b/llvm/test/CodeGen/X86/vector-ext-logic.ll
index cfbc83d5e84e6f..722f8c7b20363a 100644
--- a/llvm/test/CodeGen/X86/vector-ext-logic.ll
+++ b/llvm/test/CodeGen/X86/vector-ext-logic.ll
@@ -155,17 +155,15 @@ define <8 x i16> @zext_and_v8i16(<8 x i8> %x, <8 x i8> %y) {
define <8 x i16> @zext_or_v8i16(<8 x i8> %x, <8 x i8> %y) {
; SSE2-LABEL: zext_or_v8i16:
; SSE2: # %bb.0:
-; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SSE2-NEXT: retq
;
; AVX2-LABEL: zext_or_v8i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: retq
%xz = zext <8 x i8> %x to <8 x i16>
%yz = zext <8 x i8> %y to <8 x i16>
@@ -176,17 +174,15 @@ define <8 x i16> @zext_or_v8i16(<8 x i8> %x, <8 x i8> %y) {
define <8 x i16> @zext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) {
; SSE2-LABEL: zext_xor_v8i16:
; SSE2: # %bb.0:
-; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
; SSE2-NEXT: retq
;
; AVX2-LABEL: zext_xor_v8i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: retq
%xz = zext <8 x i8> %x to <8 x i16>
%yz = zext <8 x i8> %y to <8 x i16>
@@ -197,17 +193,15 @@ define <8 x i16> @zext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) {
define <8 x i16> @sext_and_v8i16(<8 x i8> %x, <8 x i8> %y) {
; SSE2-LABEL: sext_and_v8i16:
; SSE2: # %bb.0:
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: psraw $8, %xmm0
; SSE2-NEXT: retq
;
; AVX2-LABEL: sext_and_v8i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0
-; AVX2-NEXT: vpmovsxbw %xmm1, %xmm1
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0
; AVX2-NEXT: retq
%xs = sext <8 x i8> %x to <8 x i16>
%ys = sext <8 x i8> %y to <8 x i16>
@@ -218,17 +212,15 @@ define <8 x i16> @sext_and_v8i16(<8 x i8> %x, <8 x i8> %y) {
define <8 x i16> @sext_or_v8i16(<8 x i8> %x, <8 x i8> %y) {
; SSE2-LABEL: sext_or_v8i16:
; SSE2: # %bb.0:
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: psraw $8, %xmm0
; SSE2-NEXT: retq
;
; AVX2-LABEL: sext_or_v8i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0
-; AVX2-NEXT: vpmovsxbw %xmm1, %xmm1
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0
; AVX2-NEXT: retq
%xs = sext <8 x i8> %x to <8 x i16>
%ys = sext <8 x i8> %y to <8 x i16>
@@ -239,17 +231,15 @@ define <8 x i16> @sext_or_v8i16(<8 x i8> %x, <8 x i8> %y) {
define <8 x i16> @sext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) {
; SSE2-LABEL: sext_xor_v8i16:
; SSE2: # %bb.0:
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: psraw $8, %xmm0
; SSE2-NEXT: retq
;
; AVX2-LABEL: sext_xor_v8i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0
-; AVX2-NEXT: vpmovsxbw %xmm1, %xmm1
; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0
; AVX2-NEXT: retq
%xs = sext <8 x i8> %x to <8 x i16>
%ys = sext <8 x i8> %y to <8 x i16>
More information about the llvm-commits
mailing list