[llvm] 4ebd307 - [X86][AVX] lowerBuildVectorAsBroadcast - improve BROADCASTM lowering on non-VLX targets

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 18 11:54:00 PDT 2020


Author: Simon Pilgrim
Date: 2020-09-18T19:52:02+01:00
New Revision: 4ebd30722af5175282b99938d163ad4459aa5968

URL: https://github.com/llvm/llvm-project/commit/4ebd30722af5175282b99938d163ad4459aa5968
DIFF: https://github.com/llvm/llvm-project/commit/4ebd30722af5175282b99938d163ad4459aa5968.diff

LOG: [X86][AVX] lowerBuildVectorAsBroadcast - improve BROADCASTM lowering on non-VLX targets

Broadcast to a ZMM type then extract the low subvector.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/broadcastm-lowering.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 948197d246e6..5ec814ba61c1 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8649,7 +8649,7 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
   // b. t1 = (build_vector t0 t0)
   //
   // Create (VBROADCASTM v2i1 X)
-  if (Subtarget.hasCDI() && (VT.is512BitVector() || Subtarget.hasVLX())) {
+  if (Subtarget.hasCDI()) {
     MVT EltType = VT.getScalarType();
     unsigned NumElts = VT.getVectorNumElements();
     SDValue BOperand;
@@ -8666,12 +8666,17 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
       else
         BOperand = Ld.getOperand(0).getOperand(0);
       MVT MaskVT = BOperand.getSimpleValueType();
-      if ((EltType == MVT::i64 && MaskVT == MVT::v8i1) || // for broadcastmb2q
+      if ((EltType == MVT::i64 && MaskVT == MVT::v8i1) ||  // for broadcastmb2q
           (EltType == MVT::i32 && MaskVT == MVT::v16i1)) { // for broadcastmw2d
-        SDValue Brdcst =
-            DAG.getNode(X86ISD::VBROADCASTM, dl,
-                        MVT::getVectorVT(EltType, NumElts), BOperand);
-        return DAG.getBitcast(VT, Brdcst);
+        MVT BcstVT = MVT::getVectorVT(EltType, NumElts);
+        if (!VT.is512BitVector() && !Subtarget.hasVLX()) {
+          unsigned Scale = 512 / VT.getSizeInBits();
+          BcstVT = MVT::getVectorVT(EltType, NumElts * Scale);
+        }
+        SDValue Bcst = DAG.getNode(X86ISD::VBROADCASTM, dl, BcstVT, BOperand);
+        if (BcstVT.getSizeInBits() != VT.getSizeInBits())
+          Bcst = extractSubVector(Bcst, 0, DAG, dl, VT.getSizeInBits());
+        return DAG.getBitcast(VT, Bcst);
       }
     }
   }

diff  --git a/llvm/test/CodeGen/X86/broadcastm-lowering.ll b/llvm/test/CodeGen/X86/broadcastm-lowering.ll
index bc1ac484169e..5f34ea190fa2 100644
--- a/llvm/test/CodeGen/X86/broadcastm-lowering.ll
+++ b/llvm/test/CodeGen/X86/broadcastm-lowering.ll
@@ -7,53 +7,25 @@
 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl,avx512cd,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,AVX512VLCDBW,X86-AVX512VLCDBW
 
 define <2 x i64> @test_mm_epi64(<8 x i16> %a, <8 x i16> %b) {
-; X64-AVX512CD-LABEL: test_mm_epi64:
-; X64-AVX512CD:       # %bb.0: # %entry
-; X64-AVX512CD-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
-; X64-AVX512CD-NEXT:    vpmovsxwq %xmm0, %zmm0
-; X64-AVX512CD-NEXT:    vptestmq %zmm0, %zmm0, %k0
-; X64-AVX512CD-NEXT:    kmovw %k0, %eax
-; X64-AVX512CD-NEXT:    movzbl %al, %eax
-; X64-AVX512CD-NEXT:    vmovq %rax, %xmm0
-; X64-AVX512CD-NEXT:    vpbroadcastq %xmm0, %xmm0
-; X64-AVX512CD-NEXT:    vzeroupper
-; X64-AVX512CD-NEXT:    retq
-;
-; X86-AVX512CD-LABEL: test_mm_epi64:
-; X86-AVX512CD:       # %bb.0: # %entry
-; X86-AVX512CD-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
-; X86-AVX512CD-NEXT:    vpmovsxwq %xmm0, %zmm0
-; X86-AVX512CD-NEXT:    vptestmq %zmm0, %zmm0, %k0
-; X86-AVX512CD-NEXT:    kmovw %k0, %eax
-; X86-AVX512CD-NEXT:    movzbl %al, %eax
-; X86-AVX512CD-NEXT:    vmovd %eax, %xmm0
-; X86-AVX512CD-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
-; X86-AVX512CD-NEXT:    vzeroupper
-; X86-AVX512CD-NEXT:    retl
-;
-; X64-AVX512CDBW-LABEL: test_mm_epi64:
-; X64-AVX512CDBW:       # %bb.0: # %entry
-; X64-AVX512CDBW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
-; X64-AVX512CDBW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; X64-AVX512CDBW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
-; X64-AVX512CDBW-NEXT:    kmovd %k0, %eax
-; X64-AVX512CDBW-NEXT:    movzbl %al, %eax
-; X64-AVX512CDBW-NEXT:    vmovq %rax, %xmm0
-; X64-AVX512CDBW-NEXT:    vpbroadcastq %xmm0, %xmm0
-; X64-AVX512CDBW-NEXT:    vzeroupper
-; X64-AVX512CDBW-NEXT:    retq
+; AVX512CD-LABEL: test_mm_epi64:
+; AVX512CD:       # %bb.0: # %entry
+; AVX512CD-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX512CD-NEXT:    vpmovsxwq %xmm0, %zmm0
+; AVX512CD-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512CD-NEXT:    vpbroadcastmb2q %k0, %zmm0
+; AVX512CD-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512CD-NEXT:    vzeroupper
+; AVX512CD-NEXT:    ret{{[l|q]}}
 ;
-; X86-AVX512CDBW-LABEL: test_mm_epi64:
-; X86-AVX512CDBW:       # %bb.0: # %entry
-; X86-AVX512CDBW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
-; X86-AVX512CDBW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; X86-AVX512CDBW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
-; X86-AVX512CDBW-NEXT:    kmovd %k0, %eax
-; X86-AVX512CDBW-NEXT:    movzbl %al, %eax
-; X86-AVX512CDBW-NEXT:    vmovd %eax, %xmm0
-; X86-AVX512CDBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
-; X86-AVX512CDBW-NEXT:    vzeroupper
-; X86-AVX512CDBW-NEXT:    retl
+; AVX512CDBW-LABEL: test_mm_epi64:
+; AVX512CDBW:       # %bb.0: # %entry
+; AVX512CDBW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512CDBW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512CDBW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512CDBW-NEXT:    vpbroadcastmb2q %k0, %zmm0
+; AVX512CDBW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512CDBW-NEXT:    vzeroupper
+; AVX512CDBW-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512VLCDBW-LABEL: test_mm_epi64:
 ; AVX512VLCDBW:       # %bb.0: # %entry
@@ -83,9 +55,8 @@ define <4 x i32> @test_mm_epi32(<16 x i8> %a, <16 x i8> %b) {
 ; AVX512CDBW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; AVX512CDBW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512CDBW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
-; AVX512CDBW-NEXT:    kmovw %k0, %eax
-; AVX512CDBW-NEXT:    vmovd %eax, %xmm0
-; AVX512CDBW-NEXT:    vpbroadcastd %xmm0, %xmm0
+; AVX512CDBW-NEXT:    vpbroadcastmw2d %k0, %zmm0
+; AVX512CDBW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 ; AVX512CDBW-NEXT:    vzeroupper
 ; AVX512CDBW-NEXT:    ret{{[l|q]}}
 ;
@@ -150,49 +121,23 @@ entry:
 }
 
 define <4 x i64> @test_mm256_epi64(<8 x i32> %a, <8 x i32> %b) {
-; X64-AVX512CD-LABEL: test_mm256_epi64:
-; X64-AVX512CD:       # %bb.0: # %entry
-; X64-AVX512CD-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
-; X64-AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; X64-AVX512CD-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
-; X64-AVX512CD-NEXT:    kmovw %k0, %eax
-; X64-AVX512CD-NEXT:    movzbl %al, %eax
-; X64-AVX512CD-NEXT:    vmovq %rax, %xmm0
-; X64-AVX512CD-NEXT:    vpbroadcastq %xmm0, %ymm0
-; X64-AVX512CD-NEXT:    retq
-;
-; X86-AVX512CD-LABEL: test_mm256_epi64:
-; X86-AVX512CD:       # %bb.0: # %entry
-; X86-AVX512CD-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
-; X86-AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; X86-AVX512CD-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
-; X86-AVX512CD-NEXT:    kmovw %k0, %eax
-; X86-AVX512CD-NEXT:    movzbl %al, %eax
-; X86-AVX512CD-NEXT:    vmovd %eax, %xmm0
-; X86-AVX512CD-NEXT:    vpbroadcastq %xmm0, %ymm0
-; X86-AVX512CD-NEXT:    retl
-;
-; X64-AVX512CDBW-LABEL: test_mm256_epi64:
-; X64-AVX512CDBW:       # %bb.0: # %entry
-; X64-AVX512CDBW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
-; X64-AVX512CDBW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; X64-AVX512CDBW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
-; X64-AVX512CDBW-NEXT:    kmovd %k0, %eax
-; X64-AVX512CDBW-NEXT:    movzbl %al, %eax
-; X64-AVX512CDBW-NEXT:    vmovq %rax, %xmm0
-; X64-AVX512CDBW-NEXT:    vpbroadcastq %xmm0, %ymm0
-; X64-AVX512CDBW-NEXT:    retq
+; AVX512CD-LABEL: test_mm256_epi64:
+; AVX512CD:       # %bb.0: # %entry
+; AVX512CD-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512CD-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512CD-NEXT:    vpbroadcastmb2q %k0, %zmm0
+; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512CD-NEXT:    ret{{[l|q]}}
 ;
-; X86-AVX512CDBW-LABEL: test_mm256_epi64:
-; X86-AVX512CDBW:       # %bb.0: # %entry
-; X86-AVX512CDBW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
-; X86-AVX512CDBW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
-; X86-AVX512CDBW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
-; X86-AVX512CDBW-NEXT:    kmovd %k0, %eax
-; X86-AVX512CDBW-NEXT:    movzbl %al, %eax
-; X86-AVX512CDBW-NEXT:    vmovd %eax, %xmm0
-; X86-AVX512CDBW-NEXT:    vpbroadcastq %xmm0, %ymm0
-; X86-AVX512CDBW-NEXT:    retl
+; AVX512CDBW-LABEL: test_mm256_epi64:
+; AVX512CDBW:       # %bb.0: # %entry
+; AVX512CDBW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512CDBW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512CDBW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512CDBW-NEXT:    vpbroadcastmb2q %k0, %zmm0
+; AVX512CDBW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512CDBW-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512VLCDBW-LABEL: test_mm256_epi64:
 ; AVX512VLCDBW:       # %bb.0: # %entry
@@ -214,9 +159,8 @@ define <8 x i32> @test_mm256_epi32(<16 x i16> %a, <16 x i16> %b) {
 ; AVX512CD-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
 ; AVX512CD-NEXT:    vpmovsxwd %ymm0, %zmm0
 ; AVX512CD-NEXT:    vptestmd %zmm0, %zmm0, %k0
-; AVX512CD-NEXT:    kmovw %k0, %eax
-; AVX512CD-NEXT:    vmovd %eax, %xmm0
-; AVX512CD-NEXT:    vpbroadcastd %xmm0, %ymm0
+; AVX512CD-NEXT:    vpbroadcastmw2d %k0, %zmm0
+; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512CD-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512CDBW-LABEL: test_mm256_epi32:
@@ -224,9 +168,8 @@ define <8 x i32> @test_mm256_epi32(<16 x i16> %a, <16 x i16> %b) {
 ; AVX512CDBW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 ; AVX512CDBW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512CDBW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
-; AVX512CDBW-NEXT:    kmovw %k0, %eax
-; AVX512CDBW-NEXT:    vmovd %eax, %xmm0
-; AVX512CDBW-NEXT:    vpbroadcastd %xmm0, %ymm0
+; AVX512CDBW-NEXT:    vpbroadcastmw2d %k0, %zmm0
+; AVX512CDBW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512CDBW-NEXT:    ret{{[l|q]}}
 ;
 ; AVX512VLCDBW-LABEL: test_mm256_epi32:


        


More information about the llvm-commits mailing list