[llvm] 15d48c5 - [X86][DAG] remove LowerFCanonicalize (#188127)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 1 06:34:10 PDT 2026
Author: Gergo Stomfai
Date: 2026-04-01T13:34:05Z
New Revision: 15d48c5bbe2189db1b79aca0b4f355d0c7d664b6
URL: https://github.com/llvm/llvm-project/commit/15d48c5bbe2189db1b79aca0b4f355d0c7d664b6
DIFF: https://github.com/llvm/llvm-project/commit/15d48c5bbe2189db1b79aca0b4f355d0c7d664b6.diff
LOG: [X86][DAG] remove LowerFCanonicalize (#188127)
Remove LowerFCanonicalize. Added fallback for cases when the scalar type also has its Custom lowering to avoid regressions on AMDGPU and SystemZ.
Fixes #143862
Added:
Modified:
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/SystemZ/canonicalize-vars.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 4d7b6ea0755e4..fbed0d5378db4 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5632,6 +5632,11 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
SDNodeFlags Flags, const SDLoc &DL,
SelectionDAG &DAG) const;
+ /// Expand FCANONICALIZE to FMUL with 1.
+ /// \param NodeNode to expand
+ /// \returns The expansion result
+ SDValue expandFCANONICALIZE(SDNode *Node, SelectionDAG &DAG) const;
+
/// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes,
/// vector nodes can only succeed if all operations are legal/custom.
/// \param N Node to expand
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index c9de7faf4f2ee..54d86dfbfa303 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3783,26 +3783,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::FCANONICALIZE: {
- // This implements llvm.canonicalize.f* by multiplication with 1.0, as
- // suggested in
- // https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
- // It uses strict_fp operations even outside a strict_fp context in order
- // to guarantee that the canonicalization is not optimized away by later
- // passes. The result chain introduced by that is intentionally ignored
- // since no ordering requirement is intended here.
-
- // Create strict multiplication by 1.0.
- SDValue Operand = Node->getOperand(0);
- EVT VT = Operand.getValueType();
- SDValue One = DAG.getConstantFP(1.0, dl, VT);
- SDValue Chain = DAG.getEntryNode();
- // Propagate existing flags on canonicalize, and additionally set
- // NoFPExcept.
- SDNodeFlags CanonicalizeFlags = Node->getFlags();
- CanonicalizeFlags.setNoFPExcept(true);
- SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},
- {Chain, Operand, One}, CanonicalizeFlags);
-
+ SDValue Mul = TLI.expandFCANONICALIZE(Node, DAG);
Results.push_back(Mul);
break;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index b6e7c275bb3a7..46e9a783324f2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1076,6 +1076,20 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
+ case ISD::FCANONICALIZE: {
+ // If the scalar element type has a
+ // Legal/Custom FCANONICALIZE, don't
+ // mess with the vector, fall back.
+ EVT VT = Node->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ if (TLI.getOperationAction(ISD::FCANONICALIZE, EltVT.getSimpleVT()) !=
+ TargetLowering::Expand)
+ break;
+ // Otherwise canonicalize the whole vector.
+ SDValue Mul = TLI.expandFCANONICALIZE(Node, DAG);
+ Results.push_back(Mul);
+ return;
+ }
case ISD::FSUB:
ExpandFSUB(Node, Results);
return;
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 2b1b6c0ad636c..90ac63b147417 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8866,6 +8866,26 @@ void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
}
}
+SDValue TargetLowering::expandFCANONICALIZE(SDNode *Node,
+ SelectionDAG &DAG) const {
+ // This implements llvm.canonicalize.f* by multiplication with 1.0, as
+ // suggested in
+ // https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic.
+ // It uses strict_fp operations even outside a strict_fp context in order
+ // to guarantee that the canonicalization is not optimized away by later
+ // passes. The result chain introduced by that is intentionally ignored
+ // since no ordering requirement is intended here.
+ EVT VT = Node->getValueType(0);
+ SDLoc DL(Node);
+ SDNodeFlags Flags = Node->getFlags();
+ Flags.setNoFPExcept(true);
+ SDValue One = DAG.getConstantFP(1.0, DL, VT);
+ SDValue Mul =
+ DAG.getNode(ISD::STRICT_FMUL, DL, {VT, MVT::Other},
+ {DAG.getEntryNode(), Node->getOperand(0), One}, Flags);
+ return Mul;
+}
+
bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
SelectionDAG &DAG) const {
unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 32796c3e56781..e1a7876e30de0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -315,8 +315,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
}
- setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
@@ -346,8 +344,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (!Subtarget.hasSSE2()) {
setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
- setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
// Without SSE, i64->f64 goes through memory.
@@ -716,7 +712,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FTRUNC, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
@@ -879,7 +874,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
- setOperationAction(ISD::FCANONICALIZE , MVT::f80, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::f80, Expand);
if (isTypeLegal(MVT::f16)) {
setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
@@ -942,7 +937,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (isTypeLegal(MVT::f80)) {
setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::f80, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::f80, Expand);
}
setOperationAction(ISD::SETCC, MVT::f128, Custom);
@@ -1078,11 +1073,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v4f32, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v4f32, Expand);
setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
setOperationAction(ISD::STORE, MVT::v2f32, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v2f32, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v2f32, Expand);
setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
@@ -1137,7 +1132,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMULO, MVT::v2i32, Custom);
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v2f64, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v2f64, Expand);
setOperationAction(ISD::FABS, MVT::v2f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
@@ -1496,7 +1491,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMINIMUM, VT, Custom);
setOperationAction(ISD::FMAXIMUMNUM, VT, Custom);
setOperationAction(ISD::FMINIMUMNUM, VT, Custom);
- setOperationAction(ISD::FCANONICALIZE, VT, Custom);
+ setOperationAction(ISD::FCANONICALIZE, VT, Expand);
}
setOperationAction(ISD::LRINT, MVT::v8f32, Custom);
@@ -1783,9 +1778,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v8f16, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v16f16, Custom);
- setOperationAction(ISD::FCANONICALIZE, MVT::v32f16, Custom);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v8f16, Expand);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v16f16, Expand);
+ setOperationAction(ISD::FCANONICALIZE, MVT::v32f16, Expand);
// There is no byte sized k-register load or store without AVX512DQ.
if (!Subtarget.hasDQI()) {
@@ -1867,7 +1862,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::STRICT_FMA, VT, Legal);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
- setOperationAction(ISD::FCANONICALIZE, VT, Custom);
+ setOperationAction(ISD::FCANONICALIZE, VT, Expand);
}
setOperationAction(ISD::LRINT, MVT::v16f32,
Subtarget.hasDQI() ? Legal : Custom);
@@ -34066,24 +34061,6 @@ static SDValue LowerPREFETCH(SDValue Op, const X86Subtarget &Subtarget,
return Op;
}
-static SDValue LowerFCanonicalize(SDValue Op, SelectionDAG &DAG) {
- SDNode *N = Op.getNode();
- SDValue Operand = N->getOperand(0);
- EVT VT = Operand.getValueType();
- SDLoc dl(N);
-
- SDValue One = DAG.getConstantFP(1.0, dl, VT);
-
- // TODO: Fix Crash for bf16 when generating strict_fmul as it
- // leads to a error : SoftPromoteHalfResult #0: t11: bf16,ch = strict_fmul t0,
- // ConstantFP:bf16<APFloat(16256)>, t5 LLVM ERROR: Do not know how to soft
- // promote this operator's result!
- SDValue Chain = DAG.getEntryNode();
- SDValue StrictFmul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},
- {Chain, Operand, One});
- return StrictFmul;
-}
-
static StringRef getInstrStrFromOpNo(const SmallVectorImpl<StringRef> &AsmStrs,
unsigned OpNo) {
const APInt Operand(32, OpNo);
@@ -34225,7 +34202,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
case ISD::FSHL:
case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG);
- case ISD::FCANONICALIZE: return LowerFCanonicalize(Op, DAG);
case ISD::STRICT_SINT_TO_FP:
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::STRICT_UINT_TO_FP:
diff --git a/llvm/test/CodeGen/SystemZ/canonicalize-vars.ll b/llvm/test/CodeGen/SystemZ/canonicalize-vars.ll
index d0f3414e89497..e6659d385ae5f 100644
--- a/llvm/test/CodeGen/SystemZ/canonicalize-vars.ll
+++ b/llvm/test/CodeGen/SystemZ/canonicalize-vars.ll
@@ -205,17 +205,8 @@ define <8 x half> @canonicalize_v8f16(<8 x half> %a) nounwind {
define <4 x float> @canonicalize_v4f32(<4 x float> %a) {
; Z16-LABEL: canonicalize_v4f32:
; Z16: # %bb.0:
-; Z16-NEXT: vrepf %v0, %v24, 3
-; Z16-NEXT: vgmf %v1, 2, 8
-; Z16-NEXT: vrepf %v2, %v24, 2
-; Z16-NEXT: meebr %f0, %f1
-; Z16-NEXT: meebr %f2, %f1
-; Z16-NEXT: vrepf %v3, %v24, 1
-; Z16-NEXT: vmrhf %v0, %v2, %v0
-; Z16-NEXT: wfmsb %f2, %v24, %f1
-; Z16-NEXT: wfmsb %f1, %f3, %f1
-; Z16-NEXT: vmrhf %v1, %v2, %v1
-; Z16-NEXT: vmrhg %v24, %v1, %v0
+; Z16-NEXT: vgmf %v0, 2, 8
+; Z16-NEXT: vfmsb %v24, %v24, %v0
; Z16-NEXT: br %r14
%canonicalized = call <4 x float> @llvm.canonicalize.v4f32(<4 x float> %a)
ret <4 x float> %canonicalized
@@ -225,14 +216,8 @@ define <4 x double> @canonicalize_v4f64(<4 x double> %a) {
; Z16-LABEL: canonicalize_v4f64:
; Z16: # %bb.0:
; Z16-NEXT: vgmg %v0, 2, 11
-; Z16-NEXT: vrepg %v2, %v24, 1
-; Z16-NEXT: wfmdb %f1, %v24, %f0
-; Z16-NEXT: mdbr %f2, %f0
-; Z16-NEXT: vmrhg %v24, %v1, %v2
-; Z16-NEXT: vrepg %v2, %v26, 1
-; Z16-NEXT: wfmdb %f1, %v26, %f0
-; Z16-NEXT: wfmdb %f0, %f2, %f0
-; Z16-NEXT: vmrhg %v26, %v1, %v0
+; Z16-NEXT: vfmdb %v24, %v24, %v0
+; Z16-NEXT: vfmdb %v26, %v26, %v0
; Z16-NEXT: br %r14
%canonicalized = call <4 x double> @llvm.canonicalize.v4f64(<4 x double> %a)
ret <4 x double> %canonicalized
@@ -358,17 +343,8 @@ define void @canonicalize_ptr_v4f32(ptr %out) {
; Z16-LABEL: canonicalize_ptr_v4f32:
; Z16: # %bb.0:
; Z16-NEXT: vl %v0, 0(%r2), 3
-; Z16-NEXT: vrepf %v1, %v0, 3
-; Z16-NEXT: vgmf %v2, 2, 8
-; Z16-NEXT: vrepf %v3, %v0, 2
-; Z16-NEXT: meebr %f1, %f2
-; Z16-NEXT: meebr %f3, %f2
-; Z16-NEXT: vmrhf %v1, %v3, %v1
-; Z16-NEXT: wfmsb %f3, %f0, %f2
-; Z16-NEXT: vrepf %v0, %v0, 1
-; Z16-NEXT: meebr %f0, %f2
-; Z16-NEXT: vmrhf %v0, %v3, %v0
-; Z16-NEXT: vmrhg %v0, %v0, %v1
+; Z16-NEXT: vgmf %v1, 2, 8
+; Z16-NEXT: vfmsb %v0, %v0, %v1
; Z16-NEXT: vst %v0, 0(%r2), 3
; Z16-NEXT: br %r14
%val = load <4 x float>, ptr %out
@@ -380,17 +356,11 @@ define void @canonicalize_ptr_v4f32(ptr %out) {
define void @canonicalize_ptr_v4f64(ptr %out) {
; Z16-LABEL: canonicalize_ptr_v4f64:
; Z16: # %bb.0:
+; Z16-NEXT: vl %v0, 0(%r2), 4
; Z16-NEXT: vl %v1, 16(%r2), 4
; Z16-NEXT: vgmg %v2, 2, 11
-; Z16-NEXT: wfmdb %f3, %f1, %f2
-; Z16-NEXT: vrepg %v1, %v1, 1
-; Z16-NEXT: mdbr %f1, %f2
-; Z16-NEXT: vl %v0, 0(%r2), 4
-; Z16-NEXT: vmrhg %v1, %v3, %v1
-; Z16-NEXT: wfmdb %f3, %f0, %f2
-; Z16-NEXT: vrepg %v0, %v0, 1
-; Z16-NEXT: mdbr %f0, %f2
-; Z16-NEXT: vmrhg %v0, %v3, %v0
+; Z16-NEXT: vfmdb %v1, %v1, %v2
+; Z16-NEXT: vfmdb %v0, %v0, %v2
; Z16-NEXT: vst %v0, 0(%r2), 4
; Z16-NEXT: vst %v1, 16(%r2), 4
; Z16-NEXT: br %r14
More information about the llvm-commits
mailing list