[llvm] 5fd9451 - [X86][AVX512] lower1BitShuffle - fold broadcast(setcc(x,y)) -> setcc(broadcast(x),broadcast(y)) (PR52500)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 21 10:42:56 PDT 2022
Author: Simon Pilgrim
Date: 2022-03-21T17:42:49Z
New Revision: 5fd945166862377be390948c9ab7dab2f47df217
URL: https://github.com/llvm/llvm-project/commit/5fd945166862377be390948c9ab7dab2f47df217
DIFF: https://github.com/llvm/llvm-project/commit/5fd945166862377be390948c9ab7dab2f47df217.diff
LOG: [X86][AVX512] lower1BitShuffle - fold broadcast(setcc(x,y)) -> setcc(broadcast(x),broadcast(y)) (PR52500)
AVX512 has excellent broadcast ops for everything but vXi1 bool vectors - so if we're broadcasting a comparison result, see if we can broadcast the comparison operands instead.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-shuffle-v1.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index cbb718b0f0d75..991a70a499a76 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18942,7 +18942,18 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
Offset += NumElts; // Increment for next iteration.
}
-
+ // If we're broadcasting a SETCC result, try to broadcast the ops instead.
+ // TODO: What other unary shuffles would benefit from this?
+ if (isBroadcastShuffleMask(Mask) && V1.getOpcode() == ISD::SETCC &&
+ V1->hasOneUse()) {
+ SDValue Op0 = V1.getOperand(0);
+ SDValue Op1 = V1.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(V1.getOperand(2))->get();
+ EVT OpVT = Op0.getValueType();
+ return DAG.getSetCC(
+ DL, VT, DAG.getVectorShuffle(OpVT, DL, Op0, DAG.getUNDEF(OpVT), Mask),
+ DAG.getVectorShuffle(OpVT, DL, Op1, DAG.getUNDEF(OpVT), Mask), CC);
+ }
MVT ExtVT;
switch (VT.SimpleTy) {
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll
index 9af454b3cc984..cb36c8a6c2277 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll
@@ -919,10 +919,8 @@ define <16 x i1> @PR52500(<16 x i1> %msk, i32 %in) {
; AVX512F-NEXT: movl $789, %eax # imm = 0x315
; AVX512F-NEXT: vmovd %eax, %xmm1
; AVX512F-NEXT: vpmulld %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k2
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; AVX512F-NEXT: vpbroadcastd %xmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1 {%k1}
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
@@ -937,10 +935,8 @@ define <16 x i1> @PR52500(<16 x i1> %msk, i32 %in) {
; AVX512VL-NEXT: movl $789, %eax # imm = 0x315
; AVX512VL-NEXT: vmovd %eax, %xmm1
; AVX512VL-NEXT: vpmulld %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: vptestnmd %zmm0, %zmm0, %k2
-; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; AVX512VL-NEXT: vpbroadcastd %xmm0, %zmm0
-; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
+; AVX512VL-NEXT: vptestnmd %zmm0, %zmm0, %k1 {%k1}
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512VL-NEXT: vzeroupper
@@ -949,16 +945,13 @@ define <16 x i1> @PR52500(<16 x i1> %msk, i32 %in) {
; VL_BW_DQ-LABEL: PR52500:
; VL_BW_DQ: # %bb.0:
; VL_BW_DQ-NEXT: vpsllw $7, %xmm0, %xmm0
-; VL_BW_DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VL_BW_DQ-NEXT: vmovd %edi, %xmm2
+; VL_BW_DQ-NEXT: vpmovb2m %xmm0, %k1
+; VL_BW_DQ-NEXT: vmovd %edi, %xmm0
; VL_BW_DQ-NEXT: movl $789, %eax # imm = 0x315
-; VL_BW_DQ-NEXT: vmovd %eax, %xmm3
-; VL_BW_DQ-NEXT: vpmulld %xmm3, %xmm2, %xmm2
-; VL_BW_DQ-NEXT: vptestnmd %zmm2, %zmm2, %k0
-; VL_BW_DQ-NEXT: vpmovm2d %k0, %zmm2
-; VL_BW_DQ-NEXT: vpbroadcastd %xmm2, %zmm2
-; VL_BW_DQ-NEXT: vpmovd2m %zmm2, %k1
-; VL_BW_DQ-NEXT: vpcmpgtb %xmm0, %xmm1, %k0 {%k1}
+; VL_BW_DQ-NEXT: vmovd %eax, %xmm1
+; VL_BW_DQ-NEXT: vpmulld %xmm1, %xmm0, %xmm0
+; VL_BW_DQ-NEXT: vpbroadcastd %xmm0, %zmm0
+; VL_BW_DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0 {%k1}
; VL_BW_DQ-NEXT: vpmovm2b %k0, %xmm0
; VL_BW_DQ-NEXT: vzeroupper
; VL_BW_DQ-NEXT: retq
More information about the llvm-commits
mailing list