[llvm] 460bba3 - Revert "[X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero"
Zequan Wu via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 1 12:06:13 PDT 2023
Author: Zequan Wu
Date: 2023-09-01T15:06:04-04:00
New Revision: 460bba35211853b6278ddd6064f7228db02da132
URL: https://github.com/llvm/llvm-project/commit/460bba35211853b6278ddd6064f7228db02da132
DIFF: https://github.com/llvm/llvm-project/commit/460bba35211853b6278ddd6064f7228db02da132.diff
LOG: Revert "[X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero"
This reverts commit 239ab16ec1213749a2228368298519b377d336bb.
This causes crashes when compiling chromium with asan, attached reduced ir at: https://reviews.llvm.org/rG239ab16ec1213749a2228368298519b377d336bb#1245595
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/avx512-insert-extract.ll
llvm/test/CodeGen/X86/movmsk-cmp.ll
llvm/test/CodeGen/X86/pr33349.ll
llvm/test/CodeGen/X86/pr34177.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ee8d268379fe95..8a1ce30e00a766 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -53470,7 +53470,6 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
SDLoc dl(N);
SDValue Op = N->getOperand(0);
EVT VT = Op.getValueType();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// If we have a constant logical shift that's only used in a comparison
// against zero turn it into an equivalent AND. This allows turning it into
@@ -53494,41 +53493,12 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
}
}
- // If we're extracting from a avx512 bool vector and comparing against zero,
- // then try to just bitcast the vector to an integer to use TEST/BT directly.
- // (and (extract_elt (kshiftr vXi1, C), 0), 1) -> (and (bc vXi1), 1<<C)
- if (Op.getOpcode() == ISD::AND && isOneConstant(Op.getOperand(1)) &&
- Op.hasOneUse() && onlyZeroFlagUsed(SDValue(N, 0))) {
- SDValue Src = Op.getOperand(0);
- if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- isNullConstant(Src.getOperand(1)) &&
- Src.getOperand(0).getValueType().getScalarType() == MVT::i1) {
- SDValue BoolVec = Src.getOperand(0);
- unsigned ShAmt = 0;
- if (BoolVec.getOpcode() == X86ISD::KSHIFTR) {
- ShAmt = BoolVec.getConstantOperandVal(1);
- BoolVec = BoolVec.getOperand(0);
- }
- BoolVec = widenMaskVector(BoolVec, false, Subtarget, DAG, dl);
- EVT VecVT = BoolVec.getValueType();
- unsigned BitWidth = VecVT.getVectorNumElements();
- EVT BCVT = EVT::getIntegerVT(*DAG.getContext(), BitWidth);
- if (TLI.isTypeLegal(VecVT) && TLI.isTypeLegal(BCVT)) {
- APInt Mask = APInt::getOneBitSet(BitWidth, ShAmt);
- Op = DAG.getBitcast(BCVT, BoolVec);
- Op = DAG.getNode(ISD::AND, dl, BCVT, Op,
- DAG.getConstant(Mask, dl, BCVT));
- return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
- DAG.getConstant(0, dl, VT));
- }
- }
- }
-
// Peek through any zero-extend if we're only testing for a zero result.
if (Op.getOpcode() == ISD::ZERO_EXTEND && onlyZeroFlagUsed(SDValue(N, 0))) {
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
- if (SrcVT.getScalarSizeInBits() >= 8 && TLI.isTypeLegal(SrcVT))
+ if (SrcVT.getScalarSizeInBits() >= 8 &&
+ DAG.getTargetLoweringInfo().isTypeLegal(SrcVT))
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Src,
DAG.getConstant(0, dl, SrcVT));
}
diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index af5e3e6177dc3a..89b245b4ca8ef6 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -175,8 +175,9 @@ define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) nounwind {
; KNL-LABEL: test11:
; KNL: ## %bb.0:
; KNL-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; KNL-NEXT: kshiftrw $4, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: testb $16, %al
+; KNL-NEXT: testb $1, %al
; KNL-NEXT: je LBB10_2
; KNL-NEXT: ## %bb.1: ## %A
; KNL-NEXT: vmovdqa64 %zmm1, %zmm0
@@ -188,8 +189,9 @@ define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) nounwind {
; SKX-LABEL: test11:
; SKX: ## %bb.0:
; SKX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; SKX-NEXT: kshiftrw $4, %k0, %k0
; SKX-NEXT: kmovd %k0, %eax
-; SKX-NEXT: testb $16, %al
+; SKX-NEXT: testb $1, %al
; SKX-NEXT: je LBB10_2
; SKX-NEXT: ## %bb.1: ## %A
; SKX-NEXT: vmovdqa64 %zmm1, %zmm0
@@ -274,8 +276,9 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) nounwind {
; KNL: ## %bb.0:
; KNL-NEXT: movq %rdi, %rax
; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
+; KNL-NEXT: kshiftrw $4, %k0, %k0
; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: testb $16, %cl
+; KNL-NEXT: testb $1, %cl
; KNL-NEXT: cmoveq %rsi, %rax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -284,8 +287,9 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) nounwind {
; SKX: ## %bb.0:
; SKX-NEXT: movq %rdi, %rax
; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
+; SKX-NEXT: kshiftrb $4, %k0, %k0
; SKX-NEXT: kmovd %k0, %ecx
-; SKX-NEXT: testb $16, %cl
+; SKX-NEXT: testb $1, %cl
; SKX-NEXT: cmoveq %rsi, %rax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index a0901e265f5ae9..f90fa4482a9d90 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -4350,8 +4350,14 @@ define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k0
+; KNL-NEXT: kshiftrw $1, %k0, %k1
+; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kmovw %k0, %ecx
+<<<<<<< HEAD
; KNL-NEXT: testb $2, %cl
+=======
+; KNL-NEXT: testb $1, %al
+>>>>>>> parent of 239ab16ec121 ([X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero)
; KNL-NEXT: movl $42, %eax
; KNL-NEXT: movl $99, %edx
; KNL-NEXT: cmovel %edx, %eax
@@ -4363,8 +4369,14 @@ define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
; SKX-LABEL: PR39665_c_ray:
; SKX: # %bb.0:
; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0
+; SKX-NEXT: kshiftrb $1, %k0, %k1
+; SKX-NEXT: kmovd %k1, %eax
; SKX-NEXT: kmovd %k0, %ecx
+<<<<<<< HEAD
; SKX-NEXT: testb $2, %cl
+=======
+; SKX-NEXT: testb $1, %al
+>>>>>>> parent of 239ab16ec121 ([X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero)
; SKX-NEXT: movl $42, %eax
; SKX-NEXT: movl $99, %edx
; SKX-NEXT: cmovel %edx, %eax
diff --git a/llvm/test/CodeGen/X86/pr33349.ll b/llvm/test/CodeGen/X86/pr33349.ll
index 83d3a33572266f..d7e76ff9f19d61 100644
--- a/llvm/test/CodeGen/X86/pr33349.ll
+++ b/llvm/test/CodeGen/X86/pr33349.ll
@@ -11,28 +11,36 @@ target triple = "x86_64-unknown-linux-gnu"
; KNL-NEXT: vpslld $31, %xmm0, %xmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kshiftrw $2, %k0, %k1
+; KNL-NEXT: kshiftrw $1, %k1, %k2
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: testb $1, %al
; KNL-NEXT: fld1
; KNL-NEXT: fldz
; KNL-NEXT: fld %st(0)
; KNL-NEXT: fcmovne %st(2), %st
-; KNL-NEXT: testb $2, %al
+; KNL-NEXT: kmovw %k2, %eax
+; KNL-NEXT: testb $1, %al
; KNL-NEXT: fld %st(1)
; KNL-NEXT: fcmovne %st(3), %st
+<<<<<<< HEAD
; KNL-NEXT: kmovw %k0, %eax
+=======
+; KNL-NEXT: kshiftrw $1, %k0, %k1
+; KNL-NEXT: kmovw %k1, %eax
+>>>>>>> parent of 239ab16ec121 ([X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero)
; KNL-NEXT: testb $1, %al
; KNL-NEXT: fld %st(2)
; KNL-NEXT: fcmovne %st(4), %st
-; KNL-NEXT: testb $2, %al
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: testb $1, %al
; KNL-NEXT: fxch %st(3)
; KNL-NEXT: fcmovne %st(4), %st
; KNL-NEXT: fstp %st(4)
; KNL-NEXT: fxch %st(3)
-; KNL-NEXT: fstpt 10(%rdi)
-; KNL-NEXT: fxch %st(1)
; KNL-NEXT: fstpt (%rdi)
; KNL-NEXT: fxch %st(1)
+; KNL-NEXT: fstpt 10(%rdi)
+; KNL-NEXT: fxch %st(1)
; KNL-NEXT: fstpt 30(%rdi)
; KNL-NEXT: fstpt 20(%rdi)
; KNL-NEXT: vzeroupper
@@ -43,28 +51,36 @@ target triple = "x86_64-unknown-linux-gnu"
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
; SKX-NEXT: vpmovd2m %xmm0, %k0
; SKX-NEXT: kshiftrb $2, %k0, %k1
+; SKX-NEXT: kshiftrb $1, %k1, %k2
; SKX-NEXT: kmovd %k1, %eax
; SKX-NEXT: testb $1, %al
; SKX-NEXT: fld1
; SKX-NEXT: fldz
; SKX-NEXT: fld %st(0)
; SKX-NEXT: fcmovne %st(2), %st
-; SKX-NEXT: testb $2, %al
+; SKX-NEXT: kmovd %k2, %eax
+; SKX-NEXT: testb $1, %al
; SKX-NEXT: fld %st(1)
; SKX-NEXT: fcmovne %st(3), %st
+<<<<<<< HEAD
; SKX-NEXT: kmovd %k0, %eax
+=======
+; SKX-NEXT: kshiftrb $1, %k0, %k1
+; SKX-NEXT: kmovd %k1, %eax
+>>>>>>> parent of 239ab16ec121 ([X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero)
; SKX-NEXT: testb $1, %al
; SKX-NEXT: fld %st(2)
; SKX-NEXT: fcmovne %st(4), %st
-; SKX-NEXT: testb $2, %al
+; SKX-NEXT: kmovd %k0, %eax
+; SKX-NEXT: testb $1, %al
; SKX-NEXT: fxch %st(3)
; SKX-NEXT: fcmovne %st(4), %st
; SKX-NEXT: fstp %st(4)
; SKX-NEXT: fxch %st(3)
-; SKX-NEXT: fstpt 10(%rdi)
-; SKX-NEXT: fxch %st(1)
; SKX-NEXT: fstpt (%rdi)
; SKX-NEXT: fxch %st(1)
+; SKX-NEXT: fstpt 10(%rdi)
+; SKX-NEXT: fxch %st(1)
; SKX-NEXT: fstpt 30(%rdi)
; SKX-NEXT: fstpt 20(%rdi)
; SKX-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/pr34177.ll b/llvm/test/CodeGen/X86/pr34177.ll
index 29922c2ac1a716..6dc4ee7a831809 100644
--- a/llvm/test/CodeGen/X86/pr34177.ll
+++ b/llvm/test/CodeGen/X86/pr34177.ll
@@ -49,20 +49,35 @@ define void @test(<4 x i64> %a, <4 x x86_fp80> %b, ptr %c) local_unnamed_addr {
; AVX512VL-LABEL: test:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
+<<<<<<< HEAD
; AVX512VL-NEXT: kshiftrb $2, %k0, %k1
; AVX512VL-NEXT: kmovd %k0, %eax
; AVX512VL-NEXT: testb $2, %al
+=======
+; AVX512VL-NEXT: kshiftrb $1, %k0, %k1
+; AVX512VL-NEXT: kshiftrb $2, %k0, %k2
+; AVX512VL-NEXT: kmovd %k0, %eax
+; AVX512VL-NEXT: testb $1, %al
+>>>>>>> parent of 239ab16ec121 ([X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero)
; AVX512VL-NEXT: fld1
; AVX512VL-NEXT: fldz
; AVX512VL-NEXT: fld %st(0)
; AVX512VL-NEXT: fcmovne %st(2), %st
+; AVX512VL-NEXT: kmovd %k1, %eax
; AVX512VL-NEXT: testb $1, %al
; AVX512VL-NEXT: fld %st(1)
; AVX512VL-NEXT: fcmovne %st(3), %st
+<<<<<<< HEAD
; AVX512VL-NEXT: kmovd %k1, %eax
; AVX512VL-NEXT: testb $2, %al
+=======
+; AVX512VL-NEXT: kshiftrb $1, %k2, %k0
+; AVX512VL-NEXT: kmovd %k0, %eax
+; AVX512VL-NEXT: testb $1, %al
+>>>>>>> parent of 239ab16ec121 ([X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero)
; AVX512VL-NEXT: fld %st(2)
; AVX512VL-NEXT: fcmovne %st(4), %st
+; AVX512VL-NEXT: kmovd %k2, %eax
; AVX512VL-NEXT: testb $1, %al
; AVX512VL-NEXT: fxch %st(3)
; AVX512VL-NEXT: fcmovne %st(4), %st
@@ -77,10 +92,10 @@ define void @test(<4 x i64> %a, <4 x x86_fp80> %b, ptr %c) local_unnamed_addr {
; AVX512VL-NEXT: fstpt 10(%rdi)
; AVX512VL-NEXT: fxch %st(1)
; AVX512VL-NEXT: fadd %st, %st(0)
-; AVX512VL-NEXT: fstpt 20(%rdi)
-; AVX512VL-NEXT: fadd %st, %st(0)
; AVX512VL-NEXT: fstpt (%rdi)
; AVX512VL-NEXT: fadd %st, %st(0)
+; AVX512VL-NEXT: fstpt 20(%rdi)
+; AVX512VL-NEXT: fadd %st, %st(0)
; AVX512VL-NEXT: fstpt 60(%rdi)
; AVX512VL-NEXT: fadd %st, %st(0)
; AVX512VL-NEXT: fstpt 40(%rdi)
More information about the llvm-commits
mailing list