[llvm] d9ffd32 - [X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero (REAPPLIED)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Sep 2 09:48:44 PDT 2023


Author: Simon Pilgrim
Date: 2023-09-02T17:45:17+01:00
New Revision: d9ffd3219e3817c1818b14eb1c3d5e274d9e3df7

URL: https://github.com/llvm/llvm-project/commit/d9ffd3219e3817c1818b14eb1c3d5e274d9e3df7
DIFF: https://github.com/llvm/llvm-project/commit/d9ffd3219e3817c1818b14eb1c3d5e274d9e3df7.diff

LOG: [X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero (REAPPLIED)

We can just bitcast the pre-shifted mask as an integer and use TEST/BT directly.

Reapplied with fix for 239ab16ec121 which didn't set the comparison type correctly

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/avx512-insert-extract.ll
    llvm/test/CodeGen/X86/movmsk-cmp.ll
    llvm/test/CodeGen/X86/pr33349.ll
    llvm/test/CodeGen/X86/pr34177.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 520d8ed74a817e..7a9ee45c6beed4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -53473,6 +53473,7 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
   SDLoc dl(N);
   SDValue Op = N->getOperand(0);
   EVT VT = Op.getValueType();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   // If we have a constant logical shift that's only used in a comparison
   // against zero turn it into an equivalent AND. This allows turning it into
@@ -53496,12 +53497,41 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
     }
   }
 
+  // If we're extracting from a avx512 bool vector and comparing against zero,
+  // then try to just bitcast the vector to an integer to use TEST/BT directly.
+  // (and (extract_elt (kshiftr vXi1, C), 0), 1) -> (and (bc vXi1), 1<<C)
+  if (Op.getOpcode() == ISD::AND && isOneConstant(Op.getOperand(1)) &&
+      Op.hasOneUse() && onlyZeroFlagUsed(SDValue(N, 0))) {
+    SDValue Src = Op.getOperand(0);
+    if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+        isNullConstant(Src.getOperand(1)) &&
+        Src.getOperand(0).getValueType().getScalarType() == MVT::i1) {
+      SDValue BoolVec = Src.getOperand(0);
+      unsigned ShAmt = 0;
+      if (BoolVec.getOpcode() == X86ISD::KSHIFTR) {
+        ShAmt = BoolVec.getConstantOperandVal(1);
+        BoolVec = BoolVec.getOperand(0);
+      }
+      BoolVec = widenMaskVector(BoolVec, false, Subtarget, DAG, dl);
+      EVT VecVT = BoolVec.getValueType();
+      unsigned BitWidth = VecVT.getVectorNumElements();
+      EVT BCVT = EVT::getIntegerVT(*DAG.getContext(), BitWidth);
+      if (TLI.isTypeLegal(VecVT) && TLI.isTypeLegal(BCVT)) {
+        APInt Mask = APInt::getOneBitSet(BitWidth, ShAmt);
+        Op = DAG.getBitcast(BCVT, BoolVec);
+        Op = DAG.getNode(ISD::AND, dl, BCVT, Op,
+                         DAG.getConstant(Mask, dl, BCVT));
+        return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+                           DAG.getConstant(0, dl, BCVT));
+      }
+    }
+  }
+
   // Peek through any zero-extend if we're only testing for a zero result.
   if (Op.getOpcode() == ISD::ZERO_EXTEND && onlyZeroFlagUsed(SDValue(N, 0))) {
     SDValue Src = Op.getOperand(0);
     EVT SrcVT = Src.getValueType();
-    if (SrcVT.getScalarSizeInBits() >= 8 &&
-        DAG.getTargetLoweringInfo().isTypeLegal(SrcVT))
+    if (SrcVT.getScalarSizeInBits() >= 8 && TLI.isTypeLegal(SrcVT))
       return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Src,
                          DAG.getConstant(0, dl, SrcVT));
   }

diff  --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index 5728c14df14dd9..40cbbfe2c14e5d 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -175,9 +175,8 @@ define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) nounwind {
 ; KNL-LABEL: test11:
 ; KNL:       ## %bb.0:
 ; KNL-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
-; KNL-NEXT:    kshiftrw $4, %k0, %k0
 ; KNL-NEXT:    kmovw %k0, %eax
-; KNL-NEXT:    testb $1, %al
+; KNL-NEXT:    testb $16, %al
 ; KNL-NEXT:    je LBB10_2
 ; KNL-NEXT:  ## %bb.1: ## %A
 ; KNL-NEXT:    vmovdqa64 %zmm1, %zmm0
@@ -189,9 +188,8 @@ define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) nounwind {
 ; SKX-LABEL: test11:
 ; SKX:       ## %bb.0:
 ; SKX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
-; SKX-NEXT:    kshiftrw $4, %k0, %k0
 ; SKX-NEXT:    kmovd %k0, %eax
-; SKX-NEXT:    testb $1, %al
+; SKX-NEXT:    testb $16, %al
 ; SKX-NEXT:    je LBB10_2
 ; SKX-NEXT:  ## %bb.1: ## %A
 ; SKX-NEXT:    vmovdqa64 %zmm1, %zmm0
@@ -276,9 +274,8 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) nounwind {
 ; KNL:       ## %bb.0:
 ; KNL-NEXT:    movq %rdi, %rax
 ; KNL-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0
-; KNL-NEXT:    kshiftrw $4, %k0, %k0
 ; KNL-NEXT:    kmovw %k0, %ecx
-; KNL-NEXT:    testb $1, %cl
+; KNL-NEXT:    testb $16, %cl
 ; KNL-NEXT:    cmoveq %rsi, %rax
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
@@ -287,9 +284,8 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) nounwind {
 ; SKX:       ## %bb.0:
 ; SKX-NEXT:    movq %rdi, %rax
 ; SKX-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0
-; SKX-NEXT:    kshiftrb $4, %k0, %k0
 ; SKX-NEXT:    kmovd %k0, %ecx
-; SKX-NEXT:    testb $1, %cl
+; SKX-NEXT:    testb $16, %cl
 ; SKX-NEXT:    cmoveq %rsi, %rax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq

diff  --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index 88d819d6106ea9..a0901e265f5ae9 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -4350,10 +4350,8 @@ define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
 ; KNL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; KNL-NEXT:    vcmpltpd %zmm0, %zmm1, %k0
-; KNL-NEXT:    kshiftrw $1, %k0, %k1
-; KNL-NEXT:    kmovw %k1, %eax
 ; KNL-NEXT:    kmovw %k0, %ecx
-; KNL-NEXT:    testb $1, %al
+; KNL-NEXT:    testb $2, %cl
 ; KNL-NEXT:    movl $42, %eax
 ; KNL-NEXT:    movl $99, %edx
 ; KNL-NEXT:    cmovel %edx, %eax
@@ -4365,10 +4363,8 @@ define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
 ; SKX-LABEL: PR39665_c_ray:
 ; SKX:       # %bb.0:
 ; SKX-NEXT:    vcmpltpd %xmm0, %xmm1, %k0
-; SKX-NEXT:    kshiftrb $1, %k0, %k1
-; SKX-NEXT:    kmovd %k1, %eax
 ; SKX-NEXT:    kmovd %k0, %ecx
-; SKX-NEXT:    testb $1, %al
+; SKX-NEXT:    testb $2, %cl
 ; SKX-NEXT:    movl $42, %eax
 ; SKX-NEXT:    movl $99, %edx
 ; SKX-NEXT:    cmovel %edx, %eax

diff  --git a/llvm/test/CodeGen/X86/pr33349.ll b/llvm/test/CodeGen/X86/pr33349.ll
index 5addaca47c86ee..83d3a33572266f 100644
--- a/llvm/test/CodeGen/X86/pr33349.ll
+++ b/llvm/test/CodeGen/X86/pr33349.ll
@@ -11,32 +11,28 @@ target triple = "x86_64-unknown-linux-gnu"
 ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
 ; KNL-NEXT:    kshiftrw $2, %k0, %k1
-; KNL-NEXT:    kshiftrw $1, %k1, %k2
 ; KNL-NEXT:    kmovw %k1, %eax
 ; KNL-NEXT:    testb $1, %al
 ; KNL-NEXT:    fld1
 ; KNL-NEXT:    fldz
 ; KNL-NEXT:    fld %st(0)
 ; KNL-NEXT:    fcmovne %st(2), %st
-; KNL-NEXT:    kmovw %k2, %eax
-; KNL-NEXT:    testb $1, %al
+; KNL-NEXT:    testb $2, %al
 ; KNL-NEXT:    fld %st(1)
 ; KNL-NEXT:    fcmovne %st(3), %st
-; KNL-NEXT:    kshiftrw $1, %k0, %k1
-; KNL-NEXT:    kmovw %k1, %eax
+; KNL-NEXT:    kmovw %k0, %eax
 ; KNL-NEXT:    testb $1, %al
 ; KNL-NEXT:    fld %st(2)
 ; KNL-NEXT:    fcmovne %st(4), %st
-; KNL-NEXT:    kmovw %k0, %eax
-; KNL-NEXT:    testb $1, %al
+; KNL-NEXT:    testb $2, %al
 ; KNL-NEXT:    fxch %st(3)
 ; KNL-NEXT:    fcmovne %st(4), %st
 ; KNL-NEXT:    fstp %st(4)
 ; KNL-NEXT:    fxch %st(3)
-; KNL-NEXT:    fstpt (%rdi)
-; KNL-NEXT:    fxch %st(1)
 ; KNL-NEXT:    fstpt 10(%rdi)
 ; KNL-NEXT:    fxch %st(1)
+; KNL-NEXT:    fstpt (%rdi)
+; KNL-NEXT:    fxch %st(1)
 ; KNL-NEXT:    fstpt 30(%rdi)
 ; KNL-NEXT:    fstpt 20(%rdi)
 ; KNL-NEXT:    vzeroupper
@@ -47,32 +43,28 @@ target triple = "x86_64-unknown-linux-gnu"
 ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
 ; SKX-NEXT:    vpmovd2m %xmm0, %k0
 ; SKX-NEXT:    kshiftrb $2, %k0, %k1
-; SKX-NEXT:    kshiftrb $1, %k1, %k2
 ; SKX-NEXT:    kmovd %k1, %eax
 ; SKX-NEXT:    testb $1, %al
 ; SKX-NEXT:    fld1
 ; SKX-NEXT:    fldz
 ; SKX-NEXT:    fld %st(0)
 ; SKX-NEXT:    fcmovne %st(2), %st
-; SKX-NEXT:    kmovd %k2, %eax
-; SKX-NEXT:    testb $1, %al
+; SKX-NEXT:    testb $2, %al
 ; SKX-NEXT:    fld %st(1)
 ; SKX-NEXT:    fcmovne %st(3), %st
-; SKX-NEXT:    kshiftrb $1, %k0, %k1
-; SKX-NEXT:    kmovd %k1, %eax
+; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    testb $1, %al
 ; SKX-NEXT:    fld %st(2)
 ; SKX-NEXT:    fcmovne %st(4), %st
-; SKX-NEXT:    kmovd %k0, %eax
-; SKX-NEXT:    testb $1, %al
+; SKX-NEXT:    testb $2, %al
 ; SKX-NEXT:    fxch %st(3)
 ; SKX-NEXT:    fcmovne %st(4), %st
 ; SKX-NEXT:    fstp %st(4)
 ; SKX-NEXT:    fxch %st(3)
-; SKX-NEXT:    fstpt (%rdi)
-; SKX-NEXT:    fxch %st(1)
 ; SKX-NEXT:    fstpt 10(%rdi)
 ; SKX-NEXT:    fxch %st(1)
+; SKX-NEXT:    fstpt (%rdi)
+; SKX-NEXT:    fxch %st(1)
 ; SKX-NEXT:    fstpt 30(%rdi)
 ; SKX-NEXT:    fstpt 20(%rdi)
 ; SKX-NEXT:    retq

diff  --git a/llvm/test/CodeGen/X86/pr34177.ll b/llvm/test/CodeGen/X86/pr34177.ll
index 2653fcdcb75cad..29922c2ac1a716 100644
--- a/llvm/test/CodeGen/X86/pr34177.ll
+++ b/llvm/test/CodeGen/X86/pr34177.ll
@@ -49,24 +49,20 @@ define void @test(<4 x i64> %a, <4 x x86_fp80> %b, ptr %c) local_unnamed_addr {
 ; AVX512VL-LABEL: test:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
-; AVX512VL-NEXT:    kshiftrb $1, %k0, %k1
-; AVX512VL-NEXT:    kshiftrb $2, %k0, %k2
+; AVX512VL-NEXT:    kshiftrb $2, %k0, %k1
 ; AVX512VL-NEXT:    kmovd %k0, %eax
-; AVX512VL-NEXT:    testb $1, %al
+; AVX512VL-NEXT:    testb $2, %al
 ; AVX512VL-NEXT:    fld1
 ; AVX512VL-NEXT:    fldz
 ; AVX512VL-NEXT:    fld %st(0)
 ; AVX512VL-NEXT:    fcmovne %st(2), %st
-; AVX512VL-NEXT:    kmovd %k1, %eax
 ; AVX512VL-NEXT:    testb $1, %al
 ; AVX512VL-NEXT:    fld %st(1)
 ; AVX512VL-NEXT:    fcmovne %st(3), %st
-; AVX512VL-NEXT:    kshiftrb $1, %k2, %k0
-; AVX512VL-NEXT:    kmovd %k0, %eax
-; AVX512VL-NEXT:    testb $1, %al
+; AVX512VL-NEXT:    kmovd %k1, %eax
+; AVX512VL-NEXT:    testb $2, %al
 ; AVX512VL-NEXT:    fld %st(2)
 ; AVX512VL-NEXT:    fcmovne %st(4), %st
-; AVX512VL-NEXT:    kmovd %k2, %eax
 ; AVX512VL-NEXT:    testb $1, %al
 ; AVX512VL-NEXT:    fxch %st(3)
 ; AVX512VL-NEXT:    fcmovne %st(4), %st
@@ -81,10 +77,10 @@ define void @test(<4 x i64> %a, <4 x x86_fp80> %b, ptr %c) local_unnamed_addr {
 ; AVX512VL-NEXT:    fstpt 10(%rdi)
 ; AVX512VL-NEXT:    fxch %st(1)
 ; AVX512VL-NEXT:    fadd %st, %st(0)
-; AVX512VL-NEXT:    fstpt (%rdi)
-; AVX512VL-NEXT:    fadd %st, %st(0)
 ; AVX512VL-NEXT:    fstpt 20(%rdi)
 ; AVX512VL-NEXT:    fadd %st, %st(0)
+; AVX512VL-NEXT:    fstpt (%rdi)
+; AVX512VL-NEXT:    fadd %st, %st(0)
 ; AVX512VL-NEXT:    fstpt 60(%rdi)
 ; AVX512VL-NEXT:    fadd %st, %st(0)
 ; AVX512VL-NEXT:    fstpt 40(%rdi)


        


More information about the llvm-commits mailing list