[llvm] 9bdf683 - [X86] Enforce strict pre-legalization to combine in scalarizeExtEltFP (#117681)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 27 16:19:13 PST 2024
Author: abhishek-kaushik22
Date: 2024-11-28T08:19:10+08:00
New Revision: 9bdf683ba6cd9ad07667513d264a2bc02d969186
URL: https://github.com/llvm/llvm-project/commit/9bdf683ba6cd9ad07667513d264a2bc02d969186
DIFF: https://github.com/llvm/llvm-project/commit/9bdf683ba6cd9ad07667513d264a2bc02d969186.diff
LOG: [X86] Enforce strict pre-legalization to combine in scalarizeExtEltFP (#117681)
Use a `DCI` object to actually check the DAG combine level instead of
using the type `i1` because this assumption fails on AVX512 where we
have types like `v8i1` after legalization.
Closes #117684
Added:
llvm/test/CodeGen/X86/extract-vselect-setcc.ll
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2e065a938a3109..d490de06590f78 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -45857,7 +45857,8 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
/// Extracting a scalar FP value from vector element 0 is free, so extract each
/// operand first, then perform the math as a scalar op.
static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+ const X86Subtarget &Subtarget,
+ TargetLowering::DAGCombinerInfo &DCI) {
assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Expected extract");
SDValue Vec = ExtElt->getOperand(0);
SDValue Index = ExtElt->getOperand(1);
@@ -45892,13 +45893,13 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG,
// Vector FP selects don't fit the pattern of FP math ops (because the
// condition has a
diff erent type and we have to change the opcode), so deal
// with those here.
- // FIXME: This is restricted to pre type legalization by ensuring the setcc
- // has i1 elements. If we loosen this we need to convert vector bool to a
- // scalar bool.
- if (Vec.getOpcode() == ISD::VSELECT &&
+ // FIXME: This is restricted to pre type legalization. If we loosen this we
+ // need to convert vector bool to a scalar bool.
+ if (DCI.isBeforeLegalize() && Vec.getOpcode() == ISD::VSELECT &&
Vec.getOperand(0).getOpcode() == ISD::SETCC &&
- Vec.getOperand(0).getValueType().getScalarType() == MVT::i1 &&
Vec.getOperand(0).getOperand(0).getValueType() == VecVT) {
+ assert(Vec.getOperand(0).getValueType().getScalarType() == MVT::i1 &&
+ "Unexpected cond type for combine");
// ext (sel Cond, X, Y), 0 --> sel (ext Cond, 0), (ext X, 0), (ext Y, 0)
SDLoc DL(ExtElt);
SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
@@ -46257,7 +46258,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineArithReduction(N, DAG, Subtarget))
return V;
- if (SDValue V = scalarizeExtEltFP(N, DAG, Subtarget))
+ if (SDValue V = scalarizeExtEltFP(N, DAG, Subtarget, DCI))
return V;
if (CIdx)
diff --git a/llvm/test/CodeGen/X86/extract-vselect-setcc.ll b/llvm/test/CodeGen/X86/extract-vselect-setcc.ll
new file mode 100644
index 00000000000000..81ab104cab283a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/extract-vselect-setcc.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64 | FileCheck %s
+
+define void @PR117684(i1 %cond, <8 x float> %vec, ptr %ptr1, ptr %ptr2) #0 {
+; CHECK-LABEL: PR117684:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vcmpnltps %ymm1, %ymm0, %k1
+; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
+; CHECK-NEXT: vinsertf32x4 $0, %xmm0, %ymm0, %ymm0 {%k1} {z}
+; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
+; CHECK-NEXT: vbroadcastss %xmm2, %ymm2
+; CHECK-NEXT: testb $1, %dil
+; CHECK-NEXT: cmoveq %rdx, %rsi
+; CHECK-NEXT: vmovups %ymm2, (%rsi)
+; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT: vmovups %ymm0, (%rdx)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %cmp = fcmp olt <8 x float> %vec, zeroinitializer
+ %sel1 = select <8 x i1> %cmp, <8 x float> zeroinitializer, <8 x float>
+ <float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000,
+ float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000>
+ %fmul1 = fmul <8 x float> zeroinitializer, %sel1
+ %shuffle = shufflevector <8 x float> %fmul1, <8 x float> zeroinitializer, <8 x i32> zeroinitializer
+ %fmul2 = fmul <8 x float> %shuffle,
+ <float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000,
+ float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000, float 0x7FF8000000000000>
+ %sel2 = select i1 %cond, ptr %ptr1, ptr %ptr2
+ store <8 x float> %fmul2, ptr %sel2, align 4
+ %fmul3 = fmul <8 x float> %shuffle, zeroinitializer
+ store <8 x float> %fmul3, ptr %ptr2, align 4
+ ret void
+}
+
+attributes #0 = { "target-cpu"="skylake-avx512" }
More information about the llvm-commits
mailing list