[llvm] [NVPTX] Draft aidan (PR #119086)
Justin Fargnoli via llvm-commits
llvm-commits at lists.llvm.org
Sat Dec 7 10:35:35 PST 2024
https://github.com/justinfargnoli updated https://github.com/llvm/llvm-project/pull/119086
>From 380f0bdb2779f2c73b05c74dc4953d284ae5e8d9 Mon Sep 17 00:00:00 2001
From: justinfargnoli <justinfargnoli at gmail.com>
Date: Sat, 7 Dec 2024 10:21:40 -0800
Subject: [PATCH 1/2] [NVPTX] Draft aidan
---
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 10 +-
llvm/test/CodeGen/NVPTX/bf16-instructions.ll | 130 ++++---------------
2 files changed, 26 insertions(+), 114 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 8812136733fb24..4c50a2e677d3f2 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -809,13 +809,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// bf16 conversions.
if (STI.getSmVersion() < 90 || STI.getPTXVersion() < 78) {
for (MVT VT : {MVT::i1, MVT::i16, MVT::i32, MVT::i64}) {
- setOperationAction(
- {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
- VT, Custom);
+ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Custom);
}
- setOperationAction(
- {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
- MVT::bf16, Custom);
+ setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, MVT::bf16, Promote);
+ AddPromotedToType(ISD::FP_TO_SINT, MVT::bf16, MVT::f32);
+ AddPromotedToType(ISD::FP_TO_UINT, MVT::bf16, MVT::f32);
}
setOperationAction(ISD::FROUND, MVT::f16, Promote);
diff --git a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
index 95bca39c73ad73..03315f09e822ac 100644
--- a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
@@ -938,119 +938,33 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 {
}
define i16 @test_fptosi_i16(bfloat %a) {
-; SM70-LABEL: test_fptosi_i16(
-; SM70: {
-; SM70-NEXT: .reg .b16 %rs<2>;
-; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .f32 %f<2>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.u16 %r1, [test_fptosi_i16_param_0];
-; SM70-NEXT: shl.b32 %r2, %r1, 16;
-; SM70-NEXT: mov.b32 %f1, %r2;
-; SM70-NEXT: cvt.rzi.s16.f32 %rs1, %f1;
-; SM70-NEXT: cvt.u32.u16 %r3, %rs1;
-; SM70-NEXT: st.param.b32 [func_retval0+0], %r3;
-; SM70-NEXT: ret;
-;
-; SM80-LABEL: test_fptosi_i16(
-; SM80: {
-; SM80-NEXT: .reg .b16 %rs<3>;
-; SM80-NEXT: .reg .b32 %r<2>;
-; SM80-NEXT: .reg .f32 %f<2>;
-; SM80-EMPTY:
-; SM80-NEXT: // %bb.0:
-; SM80-NEXT: ld.param.b16 %rs1, [test_fptosi_i16_param_0];
-; SM80-NEXT: cvt.f32.bf16 %f1, %rs1;
-; SM80-NEXT: cvt.rzi.s16.f32 %rs2, %f1;
-; SM80-NEXT: cvt.u32.u16 %r1, %rs2;
-; SM80-NEXT: st.param.b32 [func_retval0+0], %r1;
-; SM80-NEXT: ret;
-;
-; SM80-FTZ-LABEL: test_fptosi_i16(
-; SM80-FTZ: {
-; SM80-FTZ-NEXT: .reg .b16 %rs<3>;
-; SM80-FTZ-NEXT: .reg .b32 %r<2>;
-; SM80-FTZ-NEXT: .reg .f32 %f<2>;
-; SM80-FTZ-EMPTY:
-; SM80-FTZ-NEXT: // %bb.0:
-; SM80-FTZ-NEXT: ld.param.b16 %rs1, [test_fptosi_i16_param_0];
-; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs1;
-; SM80-FTZ-NEXT: cvt.rzi.ftz.s16.f32 %rs2, %f1;
-; SM80-FTZ-NEXT: cvt.u32.u16 %r1, %rs2;
-; SM80-FTZ-NEXT: st.param.b32 [func_retval0+0], %r1;
-; SM80-FTZ-NEXT: ret;
-;
-; SM90-LABEL: test_fptosi_i16(
-; SM90: {
-; SM90-NEXT: .reg .b16 %rs<3>;
-; SM90-NEXT: .reg .b32 %r<2>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b16 %rs1, [test_fptosi_i16_param_0];
-; SM90-NEXT: cvt.rzi.s16.bf16 %rs2, %rs1;
-; SM90-NEXT: cvt.u32.u16 %r1, %rs2;
-; SM90-NEXT: st.param.b32 [func_retval0+0], %r1;
-; SM90-NEXT: ret;
+; CHECK-LABEL: test_fptosi_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [test_fptosi_i16_param_0];
+; CHECK-NEXT: cvt.rzi.s16.bf16 %rs2, %rs1;
+; CHECK-NEXT: cvt.u32.u16 %r1, %rs2;
+; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT: ret;
%r = fptosi bfloat %a to i16
ret i16 %r
}
define i16 @test_fptoui_i16(bfloat %a) {
-; SM70-LABEL: test_fptoui_i16(
-; SM70: {
-; SM70-NEXT: .reg .b16 %rs<2>;
-; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .f32 %f<2>;
-; SM70-EMPTY:
-; SM70-NEXT: // %bb.0:
-; SM70-NEXT: ld.param.u16 %r1, [test_fptoui_i16_param_0];
-; SM70-NEXT: shl.b32 %r2, %r1, 16;
-; SM70-NEXT: mov.b32 %f1, %r2;
-; SM70-NEXT: cvt.rzi.u16.f32 %rs1, %f1;
-; SM70-NEXT: cvt.u32.u16 %r3, %rs1;
-; SM70-NEXT: st.param.b32 [func_retval0+0], %r3;
-; SM70-NEXT: ret;
-;
-; SM80-LABEL: test_fptoui_i16(
-; SM80: {
-; SM80-NEXT: .reg .b16 %rs<3>;
-; SM80-NEXT: .reg .b32 %r<2>;
-; SM80-NEXT: .reg .f32 %f<2>;
-; SM80-EMPTY:
-; SM80-NEXT: // %bb.0:
-; SM80-NEXT: ld.param.b16 %rs1, [test_fptoui_i16_param_0];
-; SM80-NEXT: cvt.f32.bf16 %f1, %rs1;
-; SM80-NEXT: cvt.rzi.u16.f32 %rs2, %f1;
-; SM80-NEXT: cvt.u32.u16 %r1, %rs2;
-; SM80-NEXT: st.param.b32 [func_retval0+0], %r1;
-; SM80-NEXT: ret;
-;
-; SM80-FTZ-LABEL: test_fptoui_i16(
-; SM80-FTZ: {
-; SM80-FTZ-NEXT: .reg .b16 %rs<3>;
-; SM80-FTZ-NEXT: .reg .b32 %r<2>;
-; SM80-FTZ-NEXT: .reg .f32 %f<2>;
-; SM80-FTZ-EMPTY:
-; SM80-FTZ-NEXT: // %bb.0:
-; SM80-FTZ-NEXT: ld.param.b16 %rs1, [test_fptoui_i16_param_0];
-; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs1;
-; SM80-FTZ-NEXT: cvt.rzi.ftz.u16.f32 %rs2, %f1;
-; SM80-FTZ-NEXT: cvt.u32.u16 %r1, %rs2;
-; SM80-FTZ-NEXT: st.param.b32 [func_retval0+0], %r1;
-; SM80-FTZ-NEXT: ret;
-;
-; SM90-LABEL: test_fptoui_i16(
-; SM90: {
-; SM90-NEXT: .reg .b16 %rs<3>;
-; SM90-NEXT: .reg .b32 %r<2>;
-; SM90-EMPTY:
-; SM90-NEXT: // %bb.0:
-; SM90-NEXT: ld.param.b16 %rs1, [test_fptoui_i16_param_0];
-; SM90-NEXT: cvt.rzi.u16.bf16 %rs2, %rs1;
-; SM90-NEXT: cvt.u32.u16 %r1, %rs2;
-; SM90-NEXT: st.param.b32 [func_retval0+0], %r1;
-; SM90-NEXT: ret;
+; CHECK-LABEL: test_fptoui_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [test_fptoui_i16_param_0];
+; CHECK-NEXT: cvt.rzi.u16.bf16 %rs2, %rs1;
+; CHECK-NEXT: cvt.u32.u16 %r1, %rs2;
+; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT: ret;
%r = fptoui bfloat %a to i16
ret i16 %r
}
>From fef30011dfc05646181a8d8069cc61d98c1ce657 Mon Sep 17 00:00:00 2001
From: justinfargnoli <justinfargnoli at gmail.com>
Date: Sat, 7 Dec 2024 10:35:25 -0800
Subject: [PATCH 2/2] Delete code
---
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 18 ------------------
llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 1 -
2 files changed, 19 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 4c50a2e677d3f2..5d84611505f6ba 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -2702,21 +2702,6 @@ SDValue NVPTXTargetLowering::LowerINT_TO_FP(SDValue Op,
return Op;
}
-SDValue NVPTXTargetLowering::LowerFP_TO_INT(SDValue Op,
- SelectionDAG &DAG) const {
- assert(STI.getSmVersion() < 90 || STI.getPTXVersion() < 78);
-
- if (Op.getOperand(0).getValueType() == MVT::bf16) {
- SDLoc Loc(Op);
- return DAG.getNode(
- Op.getOpcode(), Loc, Op.getValueType(),
- DAG.getNode(ISD::FP_EXTEND, Loc, MVT::f32, Op.getOperand(0)));
- }
-
- // Everything else is considered legal.
- return Op;
-}
-
SDValue NVPTXTargetLowering::LowerFP_ROUND(SDValue Op,
SelectionDAG &DAG) const {
EVT NarrowVT = Op.getValueType();
@@ -2842,9 +2827,6 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
return LowerINT_TO_FP(Op, DAG);
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT:
- return LowerFP_TO_INT(Op, DAG);
case ISD::FP_ROUND:
return LowerFP_ROUND(Op, DAG);
case ISD::FP_EXTEND:
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 70e16eee346aa2..5d666fc60a025c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -631,7 +631,6 @@ class NVPTXTargetLowering : public TargetLowering {
SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
More information about the llvm-commits
mailing list