[llvm] 9103b73 - [X86] Fold MOVMSK(CONCAT(X,Y)) -> MOVMSK(AND/OR(X,Y)) for all_of/any_of patterns
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 27 10:29:05 PST 2022
Author: Simon Pilgrim
Date: 2022-01-27T18:28:09Z
New Revision: 9103b73fe05232fb233578117cd62b8b7d5f84c7
URL: https://github.com/llvm/llvm-project/commit/9103b73fe05232fb233578117cd62b8b7d5f84c7
DIFF: https://github.com/llvm/llvm-project/commit/9103b73fe05232fb233578117cd62b8b7d5f84c7.diff
LOG: [X86] Fold MOVMSK(CONCAT(X,Y)) -> MOVMSK(AND/OR(X,Y)) for all_of/any_of patterns
Makes it easier for later folds and avoids unnecessary 256-bit ops (especially on AVX1-only targets where we miss a lot of integer instructions)
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/movmsk-cmp.ll
llvm/test/CodeGen/X86/vector-compare-all_of.ll
llvm/test/CodeGen/X86/vector-compare-any_of.ll
llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 044e21dab4391..aff72452af6c7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44496,6 +44496,25 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
}
}
+ // MOVMSK(CONCAT(X,Y)) == 0 -> MOVMSK(OR(X,Y)).
+ // MOVMSK(CONCAT(X,Y)) != 0 -> MOVMSK(OR(X,Y)).
+ // MOVMSK(CONCAT(X,Y)) == -1 -> MOVMSK(AND(X,Y)).
+ // MOVMSK(CONCAT(X,Y)) != -1 -> MOVMSK(AND(X,Y)).
+ if (VecVT.is256BitVector()) {
+ SmallVector<SDValue> Ops;
+ if (collectConcatOps(peekThroughBitcasts(Vec).getNode(), Ops) &&
+ Ops.size() == 2) {
+ SDLoc DL(EFLAGS);
+ EVT SubVT = Ops[0].getValueType();
+ APInt CmpMask = APInt::getLowBitsSet(32, IsAnyOf ? 0 : NumElts / 2);
+ SDValue V = DAG.getNode(IsAnyOf ? ISD::OR : ISD::AND, DL, SubVT, Ops);
+ V = DAG.getBitcast(VecVT.getHalfNumVectorElementsVT(), V);
+ return DAG.getNode(X86ISD::CMP, DL, MVT::i32,
+ DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V),
+ DAG.getConstant(CmpMask, DL, MVT::i32));
+ }
+ }
+
// MOVMSK(PCMPEQ(X,0)) == -1 -> PTESTZ(X,X).
// MOVMSK(PCMPEQ(X,0)) != -1 -> !PTESTZ(X,X).
// MOVMSK(PCMPEQ(X,Y)) == -1 -> PTESTZ(SUB(X,Y),SUB(X,Y)).
diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index cb4af67fe0241..67ebbe979ee53 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -807,16 +807,13 @@ define i1 @allones_v8i64_sign(<8 x i64> %arg) {
;
; AVX1-LABEL: allones_v8i64_sign:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
-; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
-; AVX1-NEXT: cmpb $-1, %al
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskps %xmm0, %eax
+; AVX1-NEXT: cmpl $15, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -866,15 +863,12 @@ define i1 @allzeros_v8i64_sign(<8 x i64> %arg) {
;
; AVX1-LABEL: allzeros_v8i64_sign:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
-; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskps %xmm0, %eax
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
@@ -1056,9 +1050,9 @@ define i1 @allzeros_v8i32_not(<8 x i32> %a0) {
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
-; AVX1-NEXT: cmpb $-1, %al
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskps %xmm0, %eax
+; AVX1-NEXT: cmpl $15, %eax
; AVX1-NEXT: setne %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1145,9 +1139,9 @@ define i1 @allzeros_v8i64_not(<8 x i64> %a0) {
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
-; AVX1-NEXT: cmpb $-1, %al
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskps %xmm0, %eax
+; AVX1-NEXT: cmpl $15, %eax
; AVX1-NEXT: setne %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1895,12 +1889,11 @@ define i1 @allones_v8i32_and1(<8 x i32> %arg) {
;
; AVX1-LABEL: allones_v8i32_and1:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpslld $31, %xmm0, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
-; AVX1-NEXT: cmpb $-1, %al
+; AVX1-NEXT: vmovmskps %xmm0, %eax
+; AVX1-NEXT: cmpl $15, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1951,11 +1944,10 @@ define i1 @allzeros_v8i32_and1(<8 x i32> %arg) {
;
; AVX1-LABEL: allzeros_v8i32_and1:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpslld $31, %xmm0, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
+; AVX1-NEXT: vmovmskps %xmm0, %eax
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
@@ -2210,12 +2202,11 @@ define i1 @allones_v4i64_and1(<4 x i64> %arg) {
;
; AVX1-LABEL: allones_v4i64_and1:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpsllq $63, %xmm0, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vmovmskpd %ymm0, %eax
-; AVX1-NEXT: cmpb $15, %al
+; AVX1-NEXT: vmovmskpd %xmm0, %eax
+; AVX1-NEXT: cmpl $3, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -2267,11 +2258,10 @@ define i1 @allzeros_v4i64_and1(<4 x i64> %arg) {
;
; AVX1-LABEL: allzeros_v4i64_and1:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpsllq $63, %xmm0, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vmovmskpd %ymm0, %eax
+; AVX1-NEXT: vmovmskpd %xmm0, %eax
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
@@ -2328,20 +2318,17 @@ define i1 @allones_v8i64_and1(<8 x i64> %arg) {
;
; AVX1-LABEL: allones_v8i64_and1:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
-; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
-; AVX1-NEXT: cmpb $-1, %al
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
+; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
+; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskps %xmm0, %eax
+; AVX1-NEXT: cmpl $15, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -2397,19 +2384,16 @@ define i1 @allzeros_v8i64_and1(<8 x i64> %arg) {
;
; AVX1-LABEL: allzeros_v8i64_and1:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
-; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
; AVX1-NEXT: vpsllq $63, %xmm1, %xmm1
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsllq $63, %xmm2, %xmm2
+; AVX1-NEXT: vpsllq $63, %xmm0, %xmm0
+; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskps %xmm0, %eax
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
@@ -3196,12 +3180,11 @@ define i1 @allones_v8i32_and4(<8 x i32> %arg) {
;
; AVX1-LABEL: allones_v8i32_and4:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpslld $29, %xmm0, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpslld $29, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
-; AVX1-NEXT: cmpb $-1, %al
+; AVX1-NEXT: vmovmskps %xmm0, %eax
+; AVX1-NEXT: cmpl $15, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -3252,11 +3235,10 @@ define i1 @allzeros_v8i32_and4(<8 x i32> %arg) {
;
; AVX1-LABEL: allzeros_v8i32_and4:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpslld $29, %xmm0, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpslld $29, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
+; AVX1-NEXT: vmovmskps %xmm0, %eax
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
@@ -3511,12 +3493,11 @@ define i1 @allones_v4i64_and4(<4 x i64> %arg) {
;
; AVX1-LABEL: allones_v4i64_and4:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpsllq $61, %xmm0, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vmovmskpd %ymm0, %eax
-; AVX1-NEXT: cmpb $15, %al
+; AVX1-NEXT: vmovmskpd %xmm0, %eax
+; AVX1-NEXT: cmpl $3, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -3568,11 +3549,10 @@ define i1 @allzeros_v4i64_and4(<4 x i64> %arg) {
;
; AVX1-LABEL: allzeros_v4i64_and4:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpsllq $61, %xmm0, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vmovmskpd %ymm0, %eax
+; AVX1-NEXT: vmovmskpd %xmm0, %eax
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
@@ -3629,20 +3609,17 @@ define i1 @allones_v8i64_and4(<8 x i64> %arg) {
;
; AVX1-LABEL: allones_v8i64_and4:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
-; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
-; AVX1-NEXT: cmpb $-1, %al
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
+; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
+; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskps %xmm0, %eax
+; AVX1-NEXT: cmpl $15, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -3698,19 +3675,16 @@ define i1 @allzeros_v8i64_and4(<8 x i64> %arg) {
;
; AVX1-LABEL: allzeros_v8i64_and4:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0
-; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
; AVX1-NEXT: vpsllq $61, %xmm1, %xmm1
; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsllq $61, %xmm2, %xmm2
+; AVX1-NEXT: vpsllq $61, %xmm0, %xmm0
+; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskps %xmm0, %eax
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
index 584b941681831..0d2331b08ede1 100644
--- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
@@ -328,10 +328,10 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskpd %ymm0, %ecx
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskpd %xmm0, %ecx
; AVX1-NEXT: xorl %eax, %eax
-; AVX1-NEXT: cmpl $15, %ecx
+; AVX1-NEXT: cmpl $3, %ecx
; AVX1-NEXT: sete %al
; AVX1-NEXT: negq %rax
; AVX1-NEXT: vzeroupper
@@ -491,10 +491,10 @@ define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %ecx
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskps %xmm0, %ecx
; AVX1-NEXT: xorl %eax, %eax
-; AVX1-NEXT: cmpl $255, %ecx
+; AVX1-NEXT: cmpl $15, %ecx
; AVX1-NEXT: sete %al
; AVX1-NEXT: negl %eax
; AVX1-NEXT: vzeroupper
@@ -1184,9 +1184,9 @@ define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) {
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskpd %ymm0, %eax
-; AVX1-NEXT: cmpb $15, %al
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskpd %xmm0, %eax
+; AVX1-NEXT: cmpl $3, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1239,9 +1239,9 @@ define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
-; AVX1-NEXT: cmpb $-1, %al
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskps %xmm0, %eax
+; AVX1-NEXT: cmpl $15, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
index 9af3d5e4969e8..e41ed53a5edbf 100644
--- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
@@ -284,8 +284,8 @@ define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskpd %ymm0, %eax
+; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskpd %xmm0, %eax
; AVX1-NEXT: negl %eax
; AVX1-NEXT: sbbq %rax, %rax
; AVX1-NEXT: vzeroupper
@@ -425,8 +425,8 @@ define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
+; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskps %xmm0, %eax
; AVX1-NEXT: negl %eax
; AVX1-NEXT: sbbl %eax, %eax
; AVX1-NEXT: vzeroupper
@@ -1075,8 +1075,8 @@ define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) {
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskpd %ymm0, %eax
+; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskpd %xmm0, %eax
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: setne %al
; AVX1-NEXT: vzeroupper
@@ -1129,8 +1129,8 @@ define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
+; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskps %xmm0, %eax
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: setne %al
; AVX1-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index c94ca6ca5deb3..d2011bed2d3a1 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -287,12 +287,11 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) {
;
; AVX1-LABEL: trunc_v8i32_v8i1:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpslld $31, %xmm0, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
-; AVX1-NEXT: cmpb $-1, %al
+; AVX1-NEXT: vmovmskps %xmm0, %eax
+; AVX1-NEXT: cmpl $15, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -525,11 +524,10 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) {
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
+; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
-; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
-; AVX1-NEXT: cmpb $-1, %al
+; AVX1-NEXT: vmovmskps %xmm0, %eax
+; AVX1-NEXT: cmpl $15, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1071,9 +1069,9 @@ define i1 @icmp0_v4i64_v4i1(<4 x i64>) {
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskpd %ymm0, %eax
-; AVX1-NEXT: cmpb $15, %al
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskpd %xmm0, %eax
+; AVX1-NEXT: cmpl $3, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1137,9 +1135,9 @@ define i1 @icmp0_v8i32_v8i1(<8 x i32>) {
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
-; AVX1-NEXT: cmpb $-1, %al
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskps %xmm0, %eax
+; AVX1-NEXT: cmpl $15, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1351,9 +1349,9 @@ define i1 @icmp0_v8i64_v8i1(<8 x i64>) {
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
-; AVX1-NEXT: cmpb $-1, %al
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskps %xmm0, %eax
+; AVX1-NEXT: cmpl $15, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1880,9 +1878,9 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>, <4 x i64>) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpcmpeqq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskpd %ymm0, %eax
-; AVX1-NEXT: cmpb $15, %al
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskpd %xmm0, %eax
+; AVX1-NEXT: cmpl $3, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1948,9 +1946,9 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>, <8 x i32>) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
-; AVX1-NEXT: cmpb $-1, %al
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskps %xmm0, %eax
+; AVX1-NEXT: cmpl $15, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -2194,9 +2192,9 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>, <8 x i64>) {
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
-; AVX1-NEXT: cmpb $-1, %al
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskps %xmm0, %eax
+; AVX1-NEXT: cmpl $15, %eax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
index 6409a8ff4761f..2eb52a781b8be 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
@@ -283,11 +283,10 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) {
;
; AVX1-LABEL: trunc_v8i32_v8i1:
; AVX1: # %bb.0:
-; AVX1-NEXT: vpslld $31, %xmm0, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
+; AVX1-NEXT: vmovmskps %xmm0, %eax
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: setne %al
; AVX1-NEXT: vzeroupper
@@ -519,10 +518,9 @@ define i1 @trunc_v8i64_v8i1(<8 x i64>) {
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
+; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
-; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
+; AVX1-NEXT: vmovmskps %xmm0, %eax
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: setne %al
; AVX1-NEXT: vzeroupper
@@ -1070,8 +1068,8 @@ define i1 @icmp0_v4i64_v4i1(<4 x i64>) {
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskpd %ymm0, %eax
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskpd %xmm0, %eax
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: setne %al
; AVX1-NEXT: vzeroupper
@@ -1138,8 +1136,8 @@ define i1 @icmp0_v8i32_v8i1(<8 x i32>) {
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskps %xmm0, %eax
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: setne %al
; AVX1-NEXT: vzeroupper
@@ -1382,8 +1380,8 @@ define i1 @icmp0_v8i64_v8i1(<8 x i64>) {
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskps %xmm0, %eax
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: setne %al
; AVX1-NEXT: vzeroupper
@@ -1914,8 +1912,8 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>, <4 x i64>) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpcmpeqq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskpd %ymm0, %eax
+; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskpd %xmm0, %eax
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: setne %al
; AVX1-NEXT: vzeroupper
@@ -1982,8 +1980,8 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>, <8 x i32>) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
+; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskps %xmm0, %eax
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: setne %al
; AVX1-NEXT: vzeroupper
@@ -2222,8 +2220,8 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>, <8 x i64>) {
; AVX1-NEXT: vpcmpeqq %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovmskps %xmm0, %eax
; AVX1-NEXT: testl %eax, %eax
; AVX1-NEXT: setne %al
; AVX1-NEXT: vzeroupper
More information about the llvm-commits
mailing list