[llvm] [X86] Remove LowerFCanonicalize and use generic expansion (PR #147877)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 10 00:59:29 PDT 2025
https://github.com/woruyu updated https://github.com/llvm/llvm-project/pull/147877
>From 125e737201c2d595b69ceead8ec933b99e250d2f Mon Sep 17 00:00:00 2001
From: woruyu <1214539920 at qq.com>
Date: Thu, 10 Jul 2025 11:37:58 +0800
Subject: [PATCH 1/2] [X86] Remove LowerFCanonicalize and use generic expansion
---
llvm/include/llvm/CodeGen/TargetLowering.h | 14 +++++
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 25 +--------
.../SelectionDAG/LegalizeVectorOps.cpp | 9 ++++
.../CodeGen/SelectionDAG/TargetLowering.cpp | 20 +++++++
llvm/lib/Target/X86/X86ISelLowering.cpp | 53 +++++++------------
llvm/lib/Target/X86/X86ISelLowering.h | 2 +
6 files changed, 65 insertions(+), 58 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index fa46d296bf533..7d884299a3ba8 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5013,6 +5013,10 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
return DL.isLittleEndian();
}
+ virtual bool shouldExpandVectorFCANONICALIZEInVectorLegalizer() const {
+ return false;
+ }
+
/// Returns a 0 terminated array of registers that can be safely used as
/// scratch registers.
virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const {
@@ -5675,6 +5679,16 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
/// only the first Count elements of the vector are used.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const;
+ /// This implements llvm.canonicalize.f* by multiplication with 1.0, as
+ /// suggested in
+ /// https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
+ /// It uses strict_fp operations even outside a strict_fp context in order
+ /// to guarantee that the canonicalization is not optimized away by later
+ /// passes. The result chain introduced by that is intentionally ignored
+ /// since no ordering requirement is intended here.
+ SDValue expandFCanonicalizeWithStrictFmul(SDNode *Node, SDLoc DL,
+ SelectionDAG &DAG) const;
+
/// Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 528136a55f14a..9877015c96269 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3356,29 +3356,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::FCANONICALIZE: {
- // This implements llvm.canonicalize.f* by multiplication with 1.0, as
- // suggested in
- // https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
- // It uses strict_fp operations even outside a strict_fp context in order
- // to guarantee that the canonicalization is not optimized away by later
- // passes. The result chain introduced by that is intentionally ignored
- // since no ordering requirement is intended here.
-
- // Create strict multiplication by 1.0.
- SDValue Operand = Node->getOperand(0);
- EVT VT = Operand.getValueType();
- SDValue One = DAG.getConstantFP(1.0, dl, VT);
- SDValue Chain = DAG.getEntryNode();
- SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},
- {Chain, Operand, One});
-
- // Propagate existing flags on canonicalize, and additionally set
- // NoFPExcept.
- SDNodeFlags CanonicalizeFlags = Node->getFlags();
- CanonicalizeFlags.setNoFPExcept(true);
- Mul->setFlags(CanonicalizeFlags);
-
- Results.push_back(Mul);
+ SDValue Result = TLI.expandFCanonicalizeWithStrictFmul(Node, dl, DAG);
+ Results.push_back(Result);
break;
}
case ISD::SIGN_EXTEND_INREG: {
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index f908a66128ec8..fccd1cb61b7d5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1309,6 +1309,15 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
+ case ISD::FCANONICALIZE: {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.shouldExpandVectorFCANONICALIZEInVectorLegalizer()) {
+ SDLoc dl(Node);
+ SDValue Result = TLI.expandFCanonicalizeWithStrictFmul(Node, dl, DAG);
+ Results.push_back(Result);
+ return;
+ }
+ }
}
SDValue Unrolled = DAG.UnrollVectorOp(Node);
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b90c80002151f..47783e27647e6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -11580,6 +11580,26 @@ SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
return Res;
}
+SDValue
+TargetLowering::expandFCanonicalizeWithStrictFmul(SDNode *Node, SDLoc DL,
+ SelectionDAG &DAG) const {
+ // Create strict multiplication by 1.0.
+ SDValue Operand = Node->getOperand(0);
+ EVT VT = Operand.getValueType();
+ SDValue One = DAG.getConstantFP(1.0, DL, VT);
+ SDValue Chain = DAG.getEntryNode();
+
+ SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, DL, {VT, MVT::Other},
+ {Chain, Operand, One});
+
+ // Propagate existing flags on canonicalize, and additionally set NoFPExcept.
+ SDNodeFlags Flags = Node->getFlags();
+ Flags.setNoFPExcept(true);
+ Mul->setFlags(Flags);
+
+ return Mul;
+}
+
SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
SDValue AccOp = Node->getOperand(0);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 347ba1262b66b..07457095cdf6c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -316,8 +316,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
}
- setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
@@ -348,9 +346,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
if (!Subtarget.hasSSE2()) {
setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
- setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
- setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
// Without SSE, i64->f64 goes through memory.
@@ -716,7 +712,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FTRUNC, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
setOperationAction(ISD::LRINT, MVT::f16, Expand);
@@ -871,7 +866,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
- setOperationAction(ISD::FCANONICALIZE , MVT::f80, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::f80, Expand);
if (isTypeLegal(MVT::f16)) {
setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
@@ -934,7 +929,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (isTypeLegal(MVT::f80)) {
setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::f80, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::f80, Expand);
}
setOperationAction(ISD::SETCC, MVT::f128, Custom);
@@ -1070,11 +1065,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v4f32, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v4f32, Expand);
setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
setOperationAction(ISD::STORE, MVT::v2f32, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v2f32, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v2f32, Expand);
setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
@@ -1137,7 +1132,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMULO, MVT::v2i32, Custom);
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v2f64, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v2f64, Expand);
setOperationAction(ISD::FABS, MVT::v2f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
@@ -1473,7 +1468,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMINIMUM, VT, Custom);
setOperationAction(ISD::FMAXIMUMNUM, VT, Custom);
setOperationAction(ISD::FMINIMUMNUM, VT, Custom);
- setOperationAction(ISD::FCANONICALIZE, VT, Custom);
+ setOperationAction(ISD::FCANONICALIZE, VT, Expand);
}
setOperationAction(ISD::LRINT, MVT::v8f32, Custom);
@@ -1741,9 +1736,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v8f16, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v16f16, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v32f16, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v8f16, Expand);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v16f16, Expand);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v32f16, Expand);
// There is no byte sized k-register load or store without AVX512DQ.
if (!Subtarget.hasDQI()) {
@@ -1825,7 +1820,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::STRICT_FMA, VT, Legal);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
- setOperationAction(ISD::FCANONICALIZE, VT, Custom);
+ setOperationAction(ISD::FCANONICALIZE, VT, Expand);
}
setOperationAction(ISD::LRINT, MVT::v16f32,
Subtarget.hasDQI() ? Legal : Custom);
@@ -3318,6 +3313,13 @@ bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return true;
}
+// X86 prefers to defer vector FCANONICALIZE to DAG legalization
+// to avoid scalarization during vector legalization.
+bool X86TargetLowering::shouldExpandVectorFCANONICALIZEInVectorLegalizer()
+ const {
+ return true;
+}
+
bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
// If we are using XMM registers in the ABI and the condition of the select is
// a floating-point compare and we have blendv or conditional move, then it is
@@ -33426,24 +33428,6 @@ static SDValue LowerPREFETCH(SDValue Op, const X86Subtarget &Subtarget,
return Op;
}
-static SDValue LowerFCanonicalize(SDValue Op, SelectionDAG &DAG) {
- SDNode *N = Op.getNode();
- SDValue Operand = N->getOperand(0);
- EVT VT = Operand.getValueType();
- SDLoc dl(N);
-
- SDValue One = DAG.getConstantFP(1.0, dl, VT);
-
- // TODO: Fix Crash for bf16 when generating strict_fmul as it
- // leads to a error : SoftPromoteHalfResult #0: t11: bf16,ch = strict_fmul t0,
- // ConstantFP:bf16<APFloat(16256)>, t5 LLVM ERROR: Do not know how to soft
- // promote this operator's result!
- SDValue Chain = DAG.getEntryNode();
- SDValue StrictFmul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},
- {Chain, Operand, One});
- return StrictFmul;
-}
-
static StringRef getInstrStrFromOpNo(const SmallVectorImpl<StringRef> &AsmStrs,
unsigned OpNo) {
const APInt Operand(32, OpNo);
@@ -33583,7 +33567,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
case ISD::FSHL:
case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG);
- case ISD::FCANONICALIZE: return LowerFCanonicalize(Op, DAG);
case ISD::STRICT_SINT_TO_FP:
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::STRICT_UINT_TO_FP:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 5cb6b3e493a32..d9c72b27b46fa 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1522,6 +1522,8 @@ namespace llvm {
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
+ bool shouldExpandVectorFCANONICALIZEInVectorLegalizer() const override;
+
bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
bool convertSelectOfConstantsToMath(EVT VT) const override;
>From ac09933e1f579b2a6d78cd1b3ecd4ff11585c225 Mon Sep 17 00:00:00 2001
From: woruyu <1214539920 at qq.com>
Date: Thu, 10 Jul 2025 15:59:00 +0800
Subject: [PATCH 2/2] fix: remove target hook
---
llvm/include/llvm/CodeGen/TargetLowering.h | 4 ----
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 10 ++++------
llvm/lib/Target/X86/X86ISelLowering.cpp | 7 -------
llvm/lib/Target/X86/X86ISelLowering.h | 2 --
4 files changed, 4 insertions(+), 19 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 7d884299a3ba8..67b1ec6cfdb49 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5013,10 +5013,6 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
return DL.isLittleEndian();
}
- virtual bool shouldExpandVectorFCANONICALIZEInVectorLegalizer() const {
- return false;
- }
-
/// Returns a 0 terminated array of registers that can be safely used as
/// scratch registers.
virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const {
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index fccd1cb61b7d5..ce88fa1066d3c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1311,12 +1311,10 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
break;
case ISD::FCANONICALIZE: {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.shouldExpandVectorFCANONICALIZEInVectorLegalizer()) {
- SDLoc dl(Node);
- SDValue Result = TLI.expandFCanonicalizeWithStrictFmul(Node, dl, DAG);
- Results.push_back(Result);
- return;
- }
+ SDLoc dl(Node);
+ SDValue Result = TLI.expandFCanonicalizeWithStrictFmul(Node, dl, DAG);
+ Results.push_back(Result);
+ return;
}
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 07457095cdf6c..4279d2fec0ab2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3313,13 +3313,6 @@ bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return true;
}
-// X86 prefers to defer vector FCANONICALIZE to DAG legalization
-// to avoid scalarization during vector legalization.
-bool X86TargetLowering::shouldExpandVectorFCANONICALIZEInVectorLegalizer()
- const {
- return true;
-}
-
bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
// If we are using XMM registers in the ABI and the condition of the select is
// a floating-point compare and we have blendv or conditional move, then it is
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index d9c72b27b46fa..5cb6b3e493a32 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1522,8 +1522,6 @@ namespace llvm {
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
- bool shouldExpandVectorFCANONICALIZEInVectorLegalizer() const override;
-
bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
bool convertSelectOfConstantsToMath(EVT VT) const override;
More information about the llvm-commits
mailing list