[llvm] 5fd9451 - [X86][AVX512] lower1BitShuffle - fold broadcast(setcc(x,y)) -> setcc(broadcast(x),broadcast(y)) (PR52500)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 21 10:42:56 PDT 2022


Author: Simon Pilgrim
Date: 2022-03-21T17:42:49Z
New Revision: 5fd945166862377be390948c9ab7dab2f47df217

URL: https://github.com/llvm/llvm-project/commit/5fd945166862377be390948c9ab7dab2f47df217
DIFF: https://github.com/llvm/llvm-project/commit/5fd945166862377be390948c9ab7dab2f47df217.diff

LOG: [X86][AVX512] lower1BitShuffle - fold broadcast(setcc(x,y)) -> setcc(broadcast(x),broadcast(y)) (PR52500)

AVX512 has excellent broadcast ops for everything but vXi1 bool vectors - so if we're broadcasting a comparison result, see if we can broadcast the comparison operands instead.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/vector-shuffle-v1.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index cbb718b0f0d75..991a70a499a76 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18942,7 +18942,18 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
     Offset += NumElts; // Increment for next iteration.
   }
 
-
+  // If we're broadcasting a SETCC result, try to broadcast the ops instead.
+  // TODO: What other unary shuffles would benefit from this?
+  if (isBroadcastShuffleMask(Mask) && V1.getOpcode() == ISD::SETCC &&
+      V1->hasOneUse()) {
+    SDValue Op0 = V1.getOperand(0);
+    SDValue Op1 = V1.getOperand(1);
+    ISD::CondCode CC = cast<CondCodeSDNode>(V1.getOperand(2))->get();
+    EVT OpVT = Op0.getValueType();
+    return DAG.getSetCC(
+        DL, VT, DAG.getVectorShuffle(OpVT, DL, Op0, DAG.getUNDEF(OpVT), Mask),
+        DAG.getVectorShuffle(OpVT, DL, Op1, DAG.getUNDEF(OpVT), Mask), CC);
+  }
 
   MVT ExtVT;
   switch (VT.SimpleTy) {

diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll
index 9af454b3cc984..cb36c8a6c2277 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll
@@ -919,10 +919,8 @@ define <16 x i1> @PR52500(<16 x i1> %msk, i32 %in) {
 ; AVX512F-NEXT:    movl $789, %eax # imm = 0x315
 ; AVX512F-NEXT:    vmovd %eax, %xmm1
 ; AVX512F-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k2
-; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
 ; AVX512F-NEXT:    vpbroadcastd %xmm0, %zmm0
-; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1 {%k1}
+; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k1 {%k1}
 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
 ; AVX512F-NEXT:    vzeroupper
@@ -937,10 +935,8 @@ define <16 x i1> @PR52500(<16 x i1> %msk, i32 %in) {
 ; AVX512VL-NEXT:    movl $789, %eax # imm = 0x315
 ; AVX512VL-NEXT:    vmovd %eax, %xmm1
 ; AVX512VL-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT:    vptestnmd %zmm0, %zmm0, %k2
-; AVX512VL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
 ; AVX512VL-NEXT:    vpbroadcastd %xmm0, %zmm0
-; AVX512VL-NEXT:    vptestmd %zmm0, %zmm0, %k1 {%k1}
+; AVX512VL-NEXT:    vptestnmd %zmm0, %zmm0, %k1 {%k1}
 ; AVX512VL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
 ; AVX512VL-NEXT:    vzeroupper
@@ -949,16 +945,13 @@ define <16 x i1> @PR52500(<16 x i1> %msk, i32 %in) {
 ; VL_BW_DQ-LABEL: PR52500:
 ; VL_BW_DQ:       # %bb.0:
 ; VL_BW_DQ-NEXT:    vpsllw $7, %xmm0, %xmm0
-; VL_BW_DQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; VL_BW_DQ-NEXT:    vmovd %edi, %xmm2
+; VL_BW_DQ-NEXT:    vpmovb2m %xmm0, %k1
+; VL_BW_DQ-NEXT:    vmovd %edi, %xmm0
 ; VL_BW_DQ-NEXT:    movl $789, %eax # imm = 0x315
-; VL_BW_DQ-NEXT:    vmovd %eax, %xmm3
-; VL_BW_DQ-NEXT:    vpmulld %xmm3, %xmm2, %xmm2
-; VL_BW_DQ-NEXT:    vptestnmd %zmm2, %zmm2, %k0
-; VL_BW_DQ-NEXT:    vpmovm2d %k0, %zmm2
-; VL_BW_DQ-NEXT:    vpbroadcastd %xmm2, %zmm2
-; VL_BW_DQ-NEXT:    vpmovd2m %zmm2, %k1
-; VL_BW_DQ-NEXT:    vpcmpgtb %xmm0, %xmm1, %k0 {%k1}
+; VL_BW_DQ-NEXT:    vmovd %eax, %xmm1
+; VL_BW_DQ-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
+; VL_BW_DQ-NEXT:    vpbroadcastd %xmm0, %zmm0
+; VL_BW_DQ-NEXT:    vptestnmd %zmm0, %zmm0, %k0 {%k1}
 ; VL_BW_DQ-NEXT:    vpmovm2b %k0, %xmm0
 ; VL_BW_DQ-NEXT:    vzeroupper
 ; VL_BW_DQ-NEXT:    retq


        


More information about the llvm-commits mailing list