[llvm] 460bba3 - Revert "[X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero"

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 1 12:18:07 PDT 2023


Neither this change nor your previous revert build on top of tree.  I 
have reverted both.

Philip

On 9/1/23 12:06, Zequan Wu via llvm-commits wrote:
> Author: Zequan Wu
> Date: 2023-09-01T15:06:04-04:00
> New Revision: 460bba35211853b6278ddd6064f7228db02da132
>
> URL: https://github.com/llvm/llvm-project/commit/460bba35211853b6278ddd6064f7228db02da132
> DIFF: https://github.com/llvm/llvm-project/commit/460bba35211853b6278ddd6064f7228db02da132.diff
>
> LOG: Revert "[X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero"
>
> This reverts commit 239ab16ec1213749a2228368298519b377d336bb.
>
> This causes crashes when compiling chromium with asan, attached reduced ir at: https://reviews.llvm.org/rG239ab16ec1213749a2228368298519b377d336bb#1245595
>
> Added:
>      
>
> Modified:
>      llvm/lib/Target/X86/X86ISelLowering.cpp
>      llvm/test/CodeGen/X86/avx512-insert-extract.ll
>      llvm/test/CodeGen/X86/movmsk-cmp.ll
>      llvm/test/CodeGen/X86/pr33349.ll
>      llvm/test/CodeGen/X86/pr34177.ll
>
> Removed:
>      
>
>
> ################################################################################
> diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
> index ee8d268379fe95..8a1ce30e00a766 100644
> --- a/llvm/lib/Target/X86/X86ISelLowering.cpp
> +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
> @@ -53470,7 +53470,6 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
>     SDLoc dl(N);
>     SDValue Op = N->getOperand(0);
>     EVT VT = Op.getValueType();
> -  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
>   
>     // If we have a constant logical shift that's only used in a comparison
>     // against zero turn it into an equivalent AND. This allows turning it into
> @@ -53494,41 +53493,12 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
>       }
>     }
>   
> -  // If we're extracting from a avx512 bool vector and comparing against zero,
> -  // then try to just bitcast the vector to an integer to use TEST/BT directly.
> -  // (and (extract_elt (kshiftr vXi1, C), 0), 1) -> (and (bc vXi1), 1<<C)
> -  if (Op.getOpcode() == ISD::AND && isOneConstant(Op.getOperand(1)) &&
> -      Op.hasOneUse() && onlyZeroFlagUsed(SDValue(N, 0))) {
> -    SDValue Src = Op.getOperand(0);
> -    if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
> -        isNullConstant(Src.getOperand(1)) &&
> -        Src.getOperand(0).getValueType().getScalarType() == MVT::i1) {
> -      SDValue BoolVec = Src.getOperand(0);
> -      unsigned ShAmt = 0;
> -      if (BoolVec.getOpcode() == X86ISD::KSHIFTR) {
> -        ShAmt = BoolVec.getConstantOperandVal(1);
> -        BoolVec = BoolVec.getOperand(0);
> -      }
> -      BoolVec = widenMaskVector(BoolVec, false, Subtarget, DAG, dl);
> -      EVT VecVT = BoolVec.getValueType();
> -      unsigned BitWidth = VecVT.getVectorNumElements();
> -      EVT BCVT = EVT::getIntegerVT(*DAG.getContext(), BitWidth);
> -      if (TLI.isTypeLegal(VecVT) && TLI.isTypeLegal(BCVT)) {
> -        APInt Mask = APInt::getOneBitSet(BitWidth, ShAmt);
> -        Op = DAG.getBitcast(BCVT, BoolVec);
> -        Op = DAG.getNode(ISD::AND, dl, BCVT, Op,
> -                         DAG.getConstant(Mask, dl, BCVT));
> -        return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
> -                           DAG.getConstant(0, dl, VT));
> -      }
> -    }
> -  }
> -
>     // Peek through any zero-extend if we're only testing for a zero result.
>     if (Op.getOpcode() == ISD::ZERO_EXTEND && onlyZeroFlagUsed(SDValue(N, 0))) {
>       SDValue Src = Op.getOperand(0);
>       EVT SrcVT = Src.getValueType();
> -    if (SrcVT.getScalarSizeInBits() >= 8 && TLI.isTypeLegal(SrcVT))
> +    if (SrcVT.getScalarSizeInBits() >= 8 &&
> +        DAG.getTargetLoweringInfo().isTypeLegal(SrcVT))
>         return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Src,
>                            DAG.getConstant(0, dl, SrcVT));
>     }
>
> diff  --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
> index af5e3e6177dc3a..89b245b4ca8ef6 100644
> --- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
> +++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
> @@ -175,8 +175,9 @@ define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) nounwind {
>   ; KNL-LABEL: test11:
>   ; KNL:       ## %bb.0:
>   ; KNL-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
> +; KNL-NEXT:    kshiftrw $4, %k0, %k0
>   ; KNL-NEXT:    kmovw %k0, %eax
> -; KNL-NEXT:    testb $16, %al
> +; KNL-NEXT:    testb $1, %al
>   ; KNL-NEXT:    je LBB10_2
>   ; KNL-NEXT:  ## %bb.1: ## %A
>   ; KNL-NEXT:    vmovdqa64 %zmm1, %zmm0
> @@ -188,8 +189,9 @@ define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) nounwind {
>   ; SKX-LABEL: test11:
>   ; SKX:       ## %bb.0:
>   ; SKX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
> +; SKX-NEXT:    kshiftrw $4, %k0, %k0
>   ; SKX-NEXT:    kmovd %k0, %eax
> -; SKX-NEXT:    testb $16, %al
> +; SKX-NEXT:    testb $1, %al
>   ; SKX-NEXT:    je LBB10_2
>   ; SKX-NEXT:  ## %bb.1: ## %A
>   ; SKX-NEXT:    vmovdqa64 %zmm1, %zmm0
> @@ -274,8 +276,9 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) nounwind {
>   ; KNL:       ## %bb.0:
>   ; KNL-NEXT:    movq %rdi, %rax
>   ; KNL-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0
> +; KNL-NEXT:    kshiftrw $4, %k0, %k0
>   ; KNL-NEXT:    kmovw %k0, %ecx
> -; KNL-NEXT:    testb $16, %cl
> +; KNL-NEXT:    testb $1, %cl
>   ; KNL-NEXT:    cmoveq %rsi, %rax
>   ; KNL-NEXT:    vzeroupper
>   ; KNL-NEXT:    retq
> @@ -284,8 +287,9 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) nounwind {
>   ; SKX:       ## %bb.0:
>   ; SKX-NEXT:    movq %rdi, %rax
>   ; SKX-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0
> +; SKX-NEXT:    kshiftrb $4, %k0, %k0
>   ; SKX-NEXT:    kmovd %k0, %ecx
> -; SKX-NEXT:    testb $16, %cl
> +; SKX-NEXT:    testb $1, %cl
>   ; SKX-NEXT:    cmoveq %rsi, %rax
>   ; SKX-NEXT:    vzeroupper
>   ; SKX-NEXT:    retq
>
> diff  --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
> index a0901e265f5ae9..f90fa4482a9d90 100644
> --- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
> +++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
> @@ -4350,8 +4350,14 @@ define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
>   ; KNL-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
>   ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
>   ; KNL-NEXT:    vcmpltpd %zmm0, %zmm1, %k0
> +; KNL-NEXT:    kshiftrw $1, %k0, %k1
> +; KNL-NEXT:    kmovw %k1, %eax
>   ; KNL-NEXT:    kmovw %k0, %ecx
> +<<<<<<< HEAD
>   ; KNL-NEXT:    testb $2, %cl
> +=======
> +; KNL-NEXT:    testb $1, %al
> +>>>>>>> parent of 239ab16ec121 ([X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero)
>   ; KNL-NEXT:    movl $42, %eax
>   ; KNL-NEXT:    movl $99, %edx
>   ; KNL-NEXT:    cmovel %edx, %eax
> @@ -4363,8 +4369,14 @@ define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
>   ; SKX-LABEL: PR39665_c_ray:
>   ; SKX:       # %bb.0:
>   ; SKX-NEXT:    vcmpltpd %xmm0, %xmm1, %k0
> +; SKX-NEXT:    kshiftrb $1, %k0, %k1
> +; SKX-NEXT:    kmovd %k1, %eax
>   ; SKX-NEXT:    kmovd %k0, %ecx
> +<<<<<<< HEAD
>   ; SKX-NEXT:    testb $2, %cl
> +=======
> +; SKX-NEXT:    testb $1, %al
> +>>>>>>> parent of 239ab16ec121 ([X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero)
>   ; SKX-NEXT:    movl $42, %eax
>   ; SKX-NEXT:    movl $99, %edx
>   ; SKX-NEXT:    cmovel %edx, %eax
>
> diff  --git a/llvm/test/CodeGen/X86/pr33349.ll b/llvm/test/CodeGen/X86/pr33349.ll
> index 83d3a33572266f..d7e76ff9f19d61 100644
> --- a/llvm/test/CodeGen/X86/pr33349.ll
> +++ b/llvm/test/CodeGen/X86/pr33349.ll
> @@ -11,28 +11,36 @@ target triple = "x86_64-unknown-linux-gnu"
>   ; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
>   ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
>   ; KNL-NEXT:    kshiftrw $2, %k0, %k1
> +; KNL-NEXT:    kshiftrw $1, %k1, %k2
>   ; KNL-NEXT:    kmovw %k1, %eax
>   ; KNL-NEXT:    testb $1, %al
>   ; KNL-NEXT:    fld1
>   ; KNL-NEXT:    fldz
>   ; KNL-NEXT:    fld %st(0)
>   ; KNL-NEXT:    fcmovne %st(2), %st
> -; KNL-NEXT:    testb $2, %al
> +; KNL-NEXT:    kmovw %k2, %eax
> +; KNL-NEXT:    testb $1, %al
>   ; KNL-NEXT:    fld %st(1)
>   ; KNL-NEXT:    fcmovne %st(3), %st
> +<<<<<<< HEAD
>   ; KNL-NEXT:    kmovw %k0, %eax
> +=======
> +; KNL-NEXT:    kshiftrw $1, %k0, %k1
> +; KNL-NEXT:    kmovw %k1, %eax
> +>>>>>>> parent of 239ab16ec121 ([X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero)
>   ; KNL-NEXT:    testb $1, %al
>   ; KNL-NEXT:    fld %st(2)
>   ; KNL-NEXT:    fcmovne %st(4), %st
> -; KNL-NEXT:    testb $2, %al
> +; KNL-NEXT:    kmovw %k0, %eax
> +; KNL-NEXT:    testb $1, %al
>   ; KNL-NEXT:    fxch %st(3)
>   ; KNL-NEXT:    fcmovne %st(4), %st
>   ; KNL-NEXT:    fstp %st(4)
>   ; KNL-NEXT:    fxch %st(3)
> -; KNL-NEXT:    fstpt 10(%rdi)
> -; KNL-NEXT:    fxch %st(1)
>   ; KNL-NEXT:    fstpt (%rdi)
>   ; KNL-NEXT:    fxch %st(1)
> +; KNL-NEXT:    fstpt 10(%rdi)
> +; KNL-NEXT:    fxch %st(1)
>   ; KNL-NEXT:    fstpt 30(%rdi)
>   ; KNL-NEXT:    fstpt 20(%rdi)
>   ; KNL-NEXT:    vzeroupper
> @@ -43,28 +51,36 @@ target triple = "x86_64-unknown-linux-gnu"
>   ; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
>   ; SKX-NEXT:    vpmovd2m %xmm0, %k0
>   ; SKX-NEXT:    kshiftrb $2, %k0, %k1
> +; SKX-NEXT:    kshiftrb $1, %k1, %k2
>   ; SKX-NEXT:    kmovd %k1, %eax
>   ; SKX-NEXT:    testb $1, %al
>   ; SKX-NEXT:    fld1
>   ; SKX-NEXT:    fldz
>   ; SKX-NEXT:    fld %st(0)
>   ; SKX-NEXT:    fcmovne %st(2), %st
> -; SKX-NEXT:    testb $2, %al
> +; SKX-NEXT:    kmovd %k2, %eax
> +; SKX-NEXT:    testb $1, %al
>   ; SKX-NEXT:    fld %st(1)
>   ; SKX-NEXT:    fcmovne %st(3), %st
> +<<<<<<< HEAD
>   ; SKX-NEXT:    kmovd %k0, %eax
> +=======
> +; SKX-NEXT:    kshiftrb $1, %k0, %k1
> +; SKX-NEXT:    kmovd %k1, %eax
> +>>>>>>> parent of 239ab16ec121 ([X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero)
>   ; SKX-NEXT:    testb $1, %al
>   ; SKX-NEXT:    fld %st(2)
>   ; SKX-NEXT:    fcmovne %st(4), %st
> -; SKX-NEXT:    testb $2, %al
> +; SKX-NEXT:    kmovd %k0, %eax
> +; SKX-NEXT:    testb $1, %al
>   ; SKX-NEXT:    fxch %st(3)
>   ; SKX-NEXT:    fcmovne %st(4), %st
>   ; SKX-NEXT:    fstp %st(4)
>   ; SKX-NEXT:    fxch %st(3)
> -; SKX-NEXT:    fstpt 10(%rdi)
> -; SKX-NEXT:    fxch %st(1)
>   ; SKX-NEXT:    fstpt (%rdi)
>   ; SKX-NEXT:    fxch %st(1)
> +; SKX-NEXT:    fstpt 10(%rdi)
> +; SKX-NEXT:    fxch %st(1)
>   ; SKX-NEXT:    fstpt 30(%rdi)
>   ; SKX-NEXT:    fstpt 20(%rdi)
>   ; SKX-NEXT:    retq
>
> diff  --git a/llvm/test/CodeGen/X86/pr34177.ll b/llvm/test/CodeGen/X86/pr34177.ll
> index 29922c2ac1a716..6dc4ee7a831809 100644
> --- a/llvm/test/CodeGen/X86/pr34177.ll
> +++ b/llvm/test/CodeGen/X86/pr34177.ll
> @@ -49,20 +49,35 @@ define void @test(<4 x i64> %a, <4 x x86_fp80> %b, ptr %c) local_unnamed_addr {
>   ; AVX512VL-LABEL: test:
>   ; AVX512VL:       # %bb.0:
>   ; AVX512VL-NEXT:    vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
> +<<<<<<< HEAD
>   ; AVX512VL-NEXT:    kshiftrb $2, %k0, %k1
>   ; AVX512VL-NEXT:    kmovd %k0, %eax
>   ; AVX512VL-NEXT:    testb $2, %al
> +=======
> +; AVX512VL-NEXT:    kshiftrb $1, %k0, %k1
> +; AVX512VL-NEXT:    kshiftrb $2, %k0, %k2
> +; AVX512VL-NEXT:    kmovd %k0, %eax
> +; AVX512VL-NEXT:    testb $1, %al
> +>>>>>>> parent of 239ab16ec121 ([X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero)
>   ; AVX512VL-NEXT:    fld1
>   ; AVX512VL-NEXT:    fldz
>   ; AVX512VL-NEXT:    fld %st(0)
>   ; AVX512VL-NEXT:    fcmovne %st(2), %st
> +; AVX512VL-NEXT:    kmovd %k1, %eax
>   ; AVX512VL-NEXT:    testb $1, %al
>   ; AVX512VL-NEXT:    fld %st(1)
>   ; AVX512VL-NEXT:    fcmovne %st(3), %st
> +<<<<<<< HEAD
>   ; AVX512VL-NEXT:    kmovd %k1, %eax
>   ; AVX512VL-NEXT:    testb $2, %al
> +=======
> +; AVX512VL-NEXT:    kshiftrb $1, %k2, %k0
> +; AVX512VL-NEXT:    kmovd %k0, %eax
> +; AVX512VL-NEXT:    testb $1, %al
> +>>>>>>> parent of 239ab16ec121 ([X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero)
>   ; AVX512VL-NEXT:    fld %st(2)
>   ; AVX512VL-NEXT:    fcmovne %st(4), %st
> +; AVX512VL-NEXT:    kmovd %k2, %eax
>   ; AVX512VL-NEXT:    testb $1, %al
>   ; AVX512VL-NEXT:    fxch %st(3)
>   ; AVX512VL-NEXT:    fcmovne %st(4), %st
> @@ -77,10 +92,10 @@ define void @test(<4 x i64> %a, <4 x x86_fp80> %b, ptr %c) local_unnamed_addr {
>   ; AVX512VL-NEXT:    fstpt 10(%rdi)
>   ; AVX512VL-NEXT:    fxch %st(1)
>   ; AVX512VL-NEXT:    fadd %st, %st(0)
> -; AVX512VL-NEXT:    fstpt 20(%rdi)
> -; AVX512VL-NEXT:    fadd %st, %st(0)
>   ; AVX512VL-NEXT:    fstpt (%rdi)
>   ; AVX512VL-NEXT:    fadd %st, %st(0)
> +; AVX512VL-NEXT:    fstpt 20(%rdi)
> +; AVX512VL-NEXT:    fadd %st, %st(0)
>   ; AVX512VL-NEXT:    fstpt 60(%rdi)
>   ; AVX512VL-NEXT:    fadd %st, %st(0)
>   ; AVX512VL-NEXT:    fstpt 40(%rdi)
>
>
>          
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


More information about the llvm-commits mailing list