[llvm] [SDAG][NVPTX] Add TLI check for preferring custom FP_TO_SINT operations to FP_TO_UINT (PR #132470)
Kevin McAfee via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 25 12:49:58 PDT 2025
https://github.com/kalxr updated https://github.com/llvm/llvm-project/pull/132470
>From 5b20597778639196f35cb1ecf4c27909ac6912ad Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Fri, 21 Mar 2025 20:05:02 +0000
Subject: [PATCH 1/2] [SDAG][NVPTX] Add TLI check for preferring custom
FP_TO_SINT operations to FP_TO_UINT
---
llvm/include/llvm/CodeGen/TargetLowering.h | 6 +
.../SelectionDAG/LegalizeIntegerTypes.cpp | 25 +++-
llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 4 +
llvm/test/CodeGen/NVPTX/convert-fp-i8.ll | 134 ++++++++++++++++++
4 files changed, 164 insertions(+), 5 deletions(-)
create mode 100644 llvm/test/CodeGen/NVPTX/convert-fp-i8.ll
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 053e9d14dc2f7..af33210347eba 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3464,6 +3464,12 @@ class TargetLoweringBase {
return false;
}
+ // Is it preferable to legalize FP types to SINT instead of UINT if both SINT
+ // and UINT are custom.
+ virtual bool preferPromoteFPToCustomSINTOverCustomUINT() const {
+ return true;
+ }
+
/// Create the IR node for the given complex deinterleaving operation.
/// If one cannot be created using all the given inputs, nullptr should be
/// returned.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 204b323d7084a..3c5719bf63b77 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -853,21 +853,36 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
// If we're promoting a UINT to a larger size and the larger FP_TO_UINT is
// not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT
- // and SINT conversions are Custom, there is no way to tell which is
- // preferable. We choose SINT because that's the right thing on PPC.)
+ // and SINT conversions are Custom, we use a TLI call to check which is
+ // preferable.)
if (N->getOpcode() == ISD::FP_TO_UINT &&
!TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
- TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
+ (TLI.isOperationLegal(ISD::FP_TO_SINT, NVT) ||
+ (!TLI.isOperationCustom(ISD::FP_TO_UINT, NVT) &&
+ TLI.isOperationCustom(ISD::FP_TO_SINT, NVT)) ||
+ (TLI.isOperationCustom(ISD::FP_TO_SINT, NVT) &&
+ TLI.isOperationCustom(ISD::FP_TO_UINT, NVT) &&
+ TLI.preferPromoteFPToCustomSINTOverCustomUINT())))
NewOpc = ISD::FP_TO_SINT;
if (N->getOpcode() == ISD::STRICT_FP_TO_UINT &&
!TLI.isOperationLegal(ISD::STRICT_FP_TO_UINT, NVT) &&
- TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
+ (TLI.isOperationLegal(ISD::STRICT_FP_TO_SINT, NVT) ||
+ (!TLI.isOperationCustom(ISD::STRICT_FP_TO_UINT, NVT) &&
+ TLI.isOperationCustom(ISD::STRICT_FP_TO_SINT, NVT)) ||
+ (TLI.isOperationCustom(ISD::STRICT_FP_TO_SINT, NVT) &&
+ TLI.isOperationCustom(ISD::STRICT_FP_TO_UINT, NVT) &&
+ TLI.preferPromoteFPToCustomSINTOverCustomUINT())))
NewOpc = ISD::STRICT_FP_TO_SINT;
if (N->getOpcode() == ISD::VP_FP_TO_UINT &&
!TLI.isOperationLegal(ISD::VP_FP_TO_UINT, NVT) &&
- TLI.isOperationLegalOrCustom(ISD::VP_FP_TO_SINT, NVT))
+ (TLI.isOperationLegal(ISD::VP_FP_TO_SINT, NVT) ||
+ (!TLI.isOperationCustom(ISD::VP_FP_TO_UINT, NVT) &&
+ TLI.isOperationCustom(ISD::VP_FP_TO_SINT, NVT)) ||
+ (TLI.isOperationCustom(ISD::VP_FP_TO_SINT, NVT) &&
+ TLI.isOperationCustom(ISD::VP_FP_TO_UINT, NVT) &&
+ TLI.preferPromoteFPToCustomSINTOverCustomUINT())))
NewOpc = ISD::VP_FP_TO_SINT;
SDValue Res;
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 39470be254efa..ba1d561b8df9e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -282,6 +282,10 @@ class NVPTXTargetLowering : public TargetLowering {
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
+ bool preferPromoteFPToCustomSINTOverCustomUINT() const override {
+ return false;
+ }
+
private:
const NVPTXSubtarget &STI; // cache the subtarget here
mutable unsigned GlobalUniqueCallSite;
diff --git a/llvm/test/CodeGen/NVPTX/convert-fp-i8.ll b/llvm/test/CodeGen/NVPTX/convert-fp-i8.ll
new file mode 100644
index 0000000000000..93da39137afd8
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/convert-fp-i8.ll
@@ -0,0 +1,134 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 | FileCheck %s
+
+define i8 @cvt_u8_f32(float %x) {
+; CHECK-LABEL: cvt_u8_f32(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .f32 %f<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.f32 %f1, [cvt_u8_f32_param_0];
+; CHECK-NEXT: cvt.rzi.u16.f32 %rs1, %f1;
+; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
+ %a = fptoui float %x to i8
+ ret i8 %a
+}
+
+define i8 @cvt_u8_f64(double %x) {
+; CHECK-LABEL: cvt_u8_f64(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .f64 %fd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.f64 %fd1, [cvt_u8_f64_param_0];
+; CHECK-NEXT: cvt.rzi.u16.f64 %rs1, %fd1;
+; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
+ %a = fptoui double %x to i8
+ ret i8 %a
+}
+
+define float @cvt_f32_i8(i8 %x) {
+; CHECK-LABEL: cvt_f32_i8(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .f32 %f<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u8 %rs1, [cvt_f32_i8_param_0];
+; CHECK-NEXT: cvt.rn.f32.u16 %f1, %rs1;
+; CHECK-NEXT: st.param.f32 [func_retval0], %f1;
+; CHECK-NEXT: ret;
+ %a = uitofp i8 %x to float
+ ret float %a
+}
+
+define double @cvt_f64_i8(i8 %x) {
+; CHECK-LABEL: cvt_f64_i8(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .f64 %fd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u8 %rs1, [cvt_f64_i8_param_0];
+; CHECK-NEXT: cvt.rn.f64.u16 %fd1, %rs1;
+; CHECK-NEXT: st.param.f64 [func_retval0], %fd1;
+; CHECK-NEXT: ret;
+ %a = uitofp i8 %x to double
+ ret double %a
+}
+
+define float @cvt_f32_s8(i8 %x) {
+; CHECK-LABEL: cvt_f32_s8(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .f32 %f<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.s8 %rs1, [cvt_f32_s8_param_0];
+; CHECK-NEXT: cvt.rn.f32.s16 %f1, %rs1;
+; CHECK-NEXT: st.param.f32 [func_retval0], %f1;
+; CHECK-NEXT: ret;
+ %a = sitofp i8 %x to float
+ ret float %a
+}
+
+define double @cvt_f64_s8(i8 %x) {
+; CHECK-LABEL: cvt_f64_s8(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .f64 %fd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.s8 %rs1, [cvt_f64_s8_param_0];
+; CHECK-NEXT: cvt.rn.f64.s16 %fd1, %rs1;
+; CHECK-NEXT: st.param.f64 [func_retval0], %fd1;
+; CHECK-NEXT: ret;
+ %a = sitofp i8 %x to double
+ ret double %a
+}
+
+define i8 @cvt_s8_f32(float %x) {
+; CHECK-LABEL: cvt_s8_f32(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .f32 %f<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.f32 %f1, [cvt_s8_f32_param_0];
+; CHECK-NEXT: cvt.rzi.s16.f32 %rs1, %f1;
+; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
+; CHECK-NEXT: and.b32 %r2, %r1, 255;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-NEXT: ret;
+ %a = fptosi float %x to i8
+ ret i8 %a
+}
+
+define i8 @cvt_s8_f64(double %x) {
+; CHECK-LABEL: cvt_s8_f64(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .f64 %fd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.f64 %fd1, [cvt_s8_f64_param_0];
+; CHECK-NEXT: cvt.rzi.s16.f64 %rs1, %fd1;
+; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
+; CHECK-NEXT: and.b32 %r2, %r1, 255;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-NEXT: ret;
+ %a = fptosi double %x to i8
+ ret i8 %a
+}
>From 3d271a3c1b1e6fb89faa92429936c69f91fd5d1b Mon Sep 17 00:00:00 2001
From: Kevin McAfee <kmcafee at nvidia.com>
Date: Tue, 25 Mar 2025 19:49:29 +0000
Subject: [PATCH 2/2] Use TLI to get opcode
---
llvm/include/llvm/CodeGen/TargetLowering.h | 29 +++++++++++++--
.../SelectionDAG/LegalizeIntegerTypes.cpp | 37 +------------------
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 27 ++++++++++++++
llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 4 +-
4 files changed, 55 insertions(+), 42 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index af33210347eba..b64ab741f9c18 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3464,10 +3464,31 @@ class TargetLoweringBase {
return false;
}
- // Is it preferable to legalize FP types to SINT instead of UINT if both SINT
- // and UINT are custom.
- virtual bool preferPromoteFPToCustomSINTOverCustomUINT() const {
- return true;
+ // Get the preferred opcode for FP_TO_XINT nodes.
+ // By default, this checks if the provded operation is an illegal FP_TO_UINT
+ // and if so, checks if FP_TO_SINT is legal or custom for use as a
+ // replacement. If both UINT and SINT conversions are Custom, we choose SINT
+ // by default because that's the right thing on PPC.
+ virtual unsigned getFPToXIntOpcode(unsigned Op, EVT FromVT, EVT ToVT) const {
+ if (isOperationLegal(Op, ToVT))
+ return Op;
+ switch (Op) {
+ case ISD::FP_TO_UINT:
+ if (isOperationLegalOrCustom(ISD::FP_TO_SINT, ToVT))
+ return ISD::FP_TO_SINT;
+ break;
+ case ISD::STRICT_FP_TO_UINT:
+ if (isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, ToVT))
+ return ISD::STRICT_FP_TO_SINT;
+ break;
+ case ISD::VP_FP_TO_UINT:
+ if (isOperationLegalOrCustom(ISD::VP_FP_TO_SINT, ToVT))
+ return ISD::VP_FP_TO_SINT;
+ break;
+ default:
+ break;
+ }
+ return Op;
}
/// Create the IR node for the given complex deinterleaving operation.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 3c5719bf63b77..d8f1b06e226bf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -848,43 +848,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- unsigned NewOpc = N->getOpcode();
+ unsigned NewOpc =
+ TLI.getFPToXIntOpcode(N->getOpcode(), N->getValueType(0), NVT);
SDLoc dl(N);
- // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is
- // not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT
- // and SINT conversions are Custom, we use a TLI call to check which is
- // preferable.)
- if (N->getOpcode() == ISD::FP_TO_UINT &&
- !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
- (TLI.isOperationLegal(ISD::FP_TO_SINT, NVT) ||
- (!TLI.isOperationCustom(ISD::FP_TO_UINT, NVT) &&
- TLI.isOperationCustom(ISD::FP_TO_SINT, NVT)) ||
- (TLI.isOperationCustom(ISD::FP_TO_SINT, NVT) &&
- TLI.isOperationCustom(ISD::FP_TO_UINT, NVT) &&
- TLI.preferPromoteFPToCustomSINTOverCustomUINT())))
- NewOpc = ISD::FP_TO_SINT;
-
- if (N->getOpcode() == ISD::STRICT_FP_TO_UINT &&
- !TLI.isOperationLegal(ISD::STRICT_FP_TO_UINT, NVT) &&
- (TLI.isOperationLegal(ISD::STRICT_FP_TO_SINT, NVT) ||
- (!TLI.isOperationCustom(ISD::STRICT_FP_TO_UINT, NVT) &&
- TLI.isOperationCustom(ISD::STRICT_FP_TO_SINT, NVT)) ||
- (TLI.isOperationCustom(ISD::STRICT_FP_TO_SINT, NVT) &&
- TLI.isOperationCustom(ISD::STRICT_FP_TO_UINT, NVT) &&
- TLI.preferPromoteFPToCustomSINTOverCustomUINT())))
- NewOpc = ISD::STRICT_FP_TO_SINT;
-
- if (N->getOpcode() == ISD::VP_FP_TO_UINT &&
- !TLI.isOperationLegal(ISD::VP_FP_TO_UINT, NVT) &&
- (TLI.isOperationLegal(ISD::VP_FP_TO_SINT, NVT) ||
- (!TLI.isOperationCustom(ISD::VP_FP_TO_UINT, NVT) &&
- TLI.isOperationCustom(ISD::VP_FP_TO_SINT, NVT)) ||
- (TLI.isOperationCustom(ISD::VP_FP_TO_SINT, NVT) &&
- TLI.isOperationCustom(ISD::VP_FP_TO_UINT, NVT) &&
- TLI.preferPromoteFPToCustomSINTOverCustomUINT())))
- NewOpc = ISD::VP_FP_TO_SINT;
-
SDValue Res;
if (N->isStrictFPOpcode()) {
Res = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 7b70cf0eaaa8a..5e3e5a8612acb 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -6179,6 +6179,33 @@ Instruction *NVPTXTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
return nullptr;
}
+// Rather than default to SINT when both UINT and SINT are custom, we only
+// change the opcode when UINT is not legal and SINT is. UINT is preferred when
+// both are custom since unsigned CVT instructions can lead to slightly better
+// SASS code with fewer instructions.
+unsigned NVPTXTargetLowering::getFPToXIntOpcode(unsigned Op, EVT FromVT,
+ EVT ToVT) const {
+ if (isOperationLegal(Op, ToVT))
+ return Op;
+ switch (Op) {
+ case ISD::FP_TO_UINT:
+ if (isOperationLegal(ISD::FP_TO_SINT, ToVT))
+ return ISD::FP_TO_SINT;
+ break;
+ case ISD::STRICT_FP_TO_UINT:
+ if (isOperationLegal(ISD::STRICT_FP_TO_SINT, ToVT))
+ return ISD::STRICT_FP_TO_SINT;
+ break;
+ case ISD::VP_FP_TO_UINT:
+ if (isOperationLegal(ISD::VP_FP_TO_SINT, ToVT))
+ return ISD::VP_FP_TO_SINT;
+ break;
+ default:
+ break;
+ }
+ return Op;
+}
+
// Pin NVPTXTargetObjectFile's vtables to this file.
NVPTXTargetObjectFile::~NVPTXTargetObjectFile() = default;
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index ba1d561b8df9e..4b5edfd7c2fd3 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -282,9 +282,7 @@ class NVPTXTargetLowering : public TargetLowering {
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
- bool preferPromoteFPToCustomSINTOverCustomUINT() const override {
- return false;
- }
+ unsigned getFPToXIntOpcode(unsigned Op, EVT FromVT, EVT ToVT) const override;
private:
const NVPTXSubtarget &STI; // cache the subtarget here
More information about the llvm-commits
mailing list