[llvm] [X86] Remove restriction to pre-type legalization for combine in `scalarizeExtEltFP` (PR #117682)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 26 00:06:48 PST 2024
https://github.com/abhishek-kaushik22 created https://github.com/llvm/llvm-project/pull/117682
Remove the restriction to pre-type legalization from combine since it was already not being enforced for AVX512 where `v8i1` is a legal type but `i1` itself is not.
>From 230f5c657d0bc3e4c1706f235a9fcf61ab69c222 Mon Sep 17 00:00:00 2001
From: abhishek-kaushik22 <abhishek.kaushik at intel.com>
Date: Tue, 26 Nov 2024 13:13:17 +0530
Subject: [PATCH] [X86] Remove restriction to pre-type legalization for
scalarizeExtEltFP
Remove the restriction to pre-type legalization from combine since it was already not being enforced for AVX512 where v8i1 is a legal type but i1 itself is not.
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 34 +++++++++++++---------
llvm/test/CodeGen/X86/extractelement-fp.ll | 16 +++++++---
2 files changed, 33 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e4533570f75086..0a0c54492c2995 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -45842,7 +45842,8 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
/// Extracting a scalar FP value from vector element 0 is free, so extract each
/// operand first, then perform the math as a scalar op.
static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+ const X86Subtarget &Subtarget,
+ TargetLowering::DAGCombinerInfo &DCI) {
assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Expected extract");
SDValue Vec = ExtElt->getOperand(0);
SDValue Index = ExtElt->getOperand(1);
@@ -45877,23 +45878,30 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG,
// Vector FP selects don't fit the pattern of FP math ops (because the
// condition has a different type and we have to change the opcode), so deal
// with those here.
- // FIXME: This is restricted to pre type legalization by ensuring the setcc
- // has i1 elements. If we loosen this we need to convert vector bool to a
- // scalar bool.
if (Vec.getOpcode() == ISD::VSELECT &&
Vec.getOperand(0).getOpcode() == ISD::SETCC &&
Vec.getOperand(0).getValueType().getScalarType() == MVT::i1 &&
Vec.getOperand(0).getOperand(0).getValueType() == VecVT) {
// ext (sel Cond, X, Y), 0 --> sel (ext Cond, 0), (ext X, 0), (ext Y, 0)
SDLoc DL(ExtElt);
- SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
- Vec.getOperand(0).getValueType().getScalarType(),
- Vec.getOperand(0), Index);
- SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
- Vec.getOperand(1), Index);
- SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
- Vec.getOperand(2), Index);
- return DAG.getNode(ISD::SELECT, DL, VT, Ext0, Ext1, Ext2);
+ bool AfterLegalize = DCI.getDAGCombineLevel() >= llvm::AfterLegalizeTypes;
+ EVT VecOperandType = Vec.getOperand(0).getValueType();
+ EVT LegalType = DAG.getTargetLoweringInfo().getTypeToTransformTo(
+ *DAG.getContext(), VecOperandType.getScalarType());
+ // The second condition checks that we don't create an invalid extract e.g
+ // 32 bit extract from a v*i64. This will cause a crash on 32-bit machines.
+ if (!AfterLegalize ||
+ LegalType.getSizeInBits() >= VecOperandType.getScalarSizeInBits()) {
+ SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ AfterLegalize ? LegalType
+ : VecOperandType.getScalarType(),
+ Vec.getOperand(0), Index);
+ SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+ Vec.getOperand(1), Index);
+ SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+ Vec.getOperand(2), Index);
+ return DAG.getNode(ISD::SELECT, DL, VT, Ext0, Ext1, Ext2);
+ }
}
// TODO: This switch could include FNEG and the x86-specific FP logic ops
@@ -46242,7 +46250,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineArithReduction(N, DAG, Subtarget))
return V;
- if (SDValue V = scalarizeExtEltFP(N, DAG, Subtarget))
+ if (SDValue V = scalarizeExtEltFP(N, DAG, Subtarget, DCI))
return V;
if (CIdx)
diff --git a/llvm/test/CodeGen/X86/extractelement-fp.ll b/llvm/test/CodeGen/X86/extractelement-fp.ll
index 944f6bbfd0bfbe..967493d3ac608b 100644
--- a/llvm/test/CodeGen/X86/extractelement-fp.ll
+++ b/llvm/test/CodeGen/X86/extractelement-fp.ll
@@ -320,10 +320,18 @@ define <3 x double> @extvselectsetcc_crash(<2 x double> %x) {
; X64-LABEL: extvselectsetcc_crash:
; X64: # %bb.0:
; X64-NEXT: vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
-; X64-NEXT: vmovsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
-; X64-NEXT: vandpd %xmm2, %xmm1, %xmm1
-; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,3,3]
+; X64-NEXT: vmovq %xmm1, %rax
+; X64-NEXT: testq %rax, %rax
+; X64-NEXT: jne .{{\.?LBB[0-9]+_[0-9]+}}
+; X64-NEXT: # %bb.2:
+; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; X64-NEXT: jmp .{{\.?LBB[0-9]+_[0-9]+}}
+; X64-NEXT: .{{\.?LBB[0-9]+_[0-9]+}}:
+; X64-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
+; X64-NEXT: .{{\.?LBB[0-9]+_[0-9]+}}:
+; X64-NEXT: vshufpd $1, %xmm0, %xmm0, %xmm2 # xmm2 = xmm0[1,0]
+; X64-NEXT: vunpcklpd %xmm0, %xmm1, %xmm0 # xmm0 = xmm1[0],xmm0[0]
+; X64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; X64-NEXT: retq
;
; X86-LABEL: extvselectsetcc_crash:
More information about the llvm-commits
mailing list