[llvm] [NVPTX] Draft aidan (PR #119086)

Justin Fargnoli via llvm-commits llvm-commits at lists.llvm.org
Sat Dec 7 10:35:35 PST 2024


https://github.com/justinfargnoli updated https://github.com/llvm/llvm-project/pull/119086

>From 380f0bdb2779f2c73b05c74dc4953d284ae5e8d9 Mon Sep 17 00:00:00 2001
From: justinfargnoli <justinfargnoli at gmail.com>
Date: Sat, 7 Dec 2024 10:21:40 -0800
Subject: [PATCH 1/2] [NVPTX] Draft aidan

---
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp  |  10 +-
 llvm/test/CodeGen/NVPTX/bf16-instructions.ll | 130 ++++---------------
 2 files changed, 26 insertions(+), 114 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 8812136733fb24..4c50a2e677d3f2 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -809,13 +809,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
   // bf16 conversions.
   if (STI.getSmVersion() < 90 || STI.getPTXVersion() < 78) {
     for (MVT VT : {MVT::i1, MVT::i16, MVT::i32, MVT::i64}) {
-      setOperationAction(
-          {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
-          VT, Custom);
+      setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Custom);
     }
-    setOperationAction(
-        {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
-        MVT::bf16, Custom);
+    setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, MVT::bf16, Promote);
+    AddPromotedToType(ISD::FP_TO_SINT, MVT::bf16, MVT::f32);
+    AddPromotedToType(ISD::FP_TO_UINT, MVT::bf16, MVT::f32);
   }
 
   setOperationAction(ISD::FROUND, MVT::f16, Promote);
diff --git a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
index 95bca39c73ad73..03315f09e822ac 100644
--- a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
@@ -938,119 +938,33 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 {
 }
 
 define i16 @test_fptosi_i16(bfloat %a) {
-; SM70-LABEL: test_fptosi_i16(
-; SM70:       {
-; SM70-NEXT:    .reg .b16 %rs<2>;
-; SM70-NEXT:    .reg .b32 %r<4>;
-; SM70-NEXT:    .reg .f32 %f<2>;
-; SM70-EMPTY:
-; SM70-NEXT:  // %bb.0:
-; SM70-NEXT:    ld.param.u16 %r1, [test_fptosi_i16_param_0];
-; SM70-NEXT:    shl.b32 %r2, %r1, 16;
-; SM70-NEXT:    mov.b32 %f1, %r2;
-; SM70-NEXT:    cvt.rzi.s16.f32 %rs1, %f1;
-; SM70-NEXT:    cvt.u32.u16 %r3, %rs1;
-; SM70-NEXT:    st.param.b32 [func_retval0+0], %r3;
-; SM70-NEXT:    ret;
-;
-; SM80-LABEL: test_fptosi_i16(
-; SM80:       {
-; SM80-NEXT:    .reg .b16 %rs<3>;
-; SM80-NEXT:    .reg .b32 %r<2>;
-; SM80-NEXT:    .reg .f32 %f<2>;
-; SM80-EMPTY:
-; SM80-NEXT:  // %bb.0:
-; SM80-NEXT:    ld.param.b16 %rs1, [test_fptosi_i16_param_0];
-; SM80-NEXT:    cvt.f32.bf16 %f1, %rs1;
-; SM80-NEXT:    cvt.rzi.s16.f32 %rs2, %f1;
-; SM80-NEXT:    cvt.u32.u16 %r1, %rs2;
-; SM80-NEXT:    st.param.b32 [func_retval0+0], %r1;
-; SM80-NEXT:    ret;
-;
-; SM80-FTZ-LABEL: test_fptosi_i16(
-; SM80-FTZ:       {
-; SM80-FTZ-NEXT:    .reg .b16 %rs<3>;
-; SM80-FTZ-NEXT:    .reg .b32 %r<2>;
-; SM80-FTZ-NEXT:    .reg .f32 %f<2>;
-; SM80-FTZ-EMPTY:
-; SM80-FTZ-NEXT:  // %bb.0:
-; SM80-FTZ-NEXT:    ld.param.b16 %rs1, [test_fptosi_i16_param_0];
-; SM80-FTZ-NEXT:    cvt.ftz.f32.bf16 %f1, %rs1;
-; SM80-FTZ-NEXT:    cvt.rzi.ftz.s16.f32 %rs2, %f1;
-; SM80-FTZ-NEXT:    cvt.u32.u16 %r1, %rs2;
-; SM80-FTZ-NEXT:    st.param.b32 [func_retval0+0], %r1;
-; SM80-FTZ-NEXT:    ret;
-;
-; SM90-LABEL: test_fptosi_i16(
-; SM90:       {
-; SM90-NEXT:    .reg .b16 %rs<3>;
-; SM90-NEXT:    .reg .b32 %r<2>;
-; SM90-EMPTY:
-; SM90-NEXT:  // %bb.0:
-; SM90-NEXT:    ld.param.b16 %rs1, [test_fptosi_i16_param_0];
-; SM90-NEXT:    cvt.rzi.s16.bf16 %rs2, %rs1;
-; SM90-NEXT:    cvt.u32.u16 %r1, %rs2;
-; SM90-NEXT:    st.param.b32 [func_retval0+0], %r1;
-; SM90-NEXT:    ret;
+; CHECK-LABEL: test_fptosi_i16(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<3>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b16 %rs1, [test_fptosi_i16_param_0];
+; CHECK-NEXT:    cvt.rzi.s16.bf16 %rs2, %rs1;
+; CHECK-NEXT:    cvt.u32.u16 %r1, %rs2;
+; CHECK-NEXT:    st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT:    ret;
   %r = fptosi bfloat %a to i16
   ret i16 %r
 }
 
 define i16 @test_fptoui_i16(bfloat %a) {
-; SM70-LABEL: test_fptoui_i16(
-; SM70:       {
-; SM70-NEXT:    .reg .b16 %rs<2>;
-; SM70-NEXT:    .reg .b32 %r<4>;
-; SM70-NEXT:    .reg .f32 %f<2>;
-; SM70-EMPTY:
-; SM70-NEXT:  // %bb.0:
-; SM70-NEXT:    ld.param.u16 %r1, [test_fptoui_i16_param_0];
-; SM70-NEXT:    shl.b32 %r2, %r1, 16;
-; SM70-NEXT:    mov.b32 %f1, %r2;
-; SM70-NEXT:    cvt.rzi.u16.f32 %rs1, %f1;
-; SM70-NEXT:    cvt.u32.u16 %r3, %rs1;
-; SM70-NEXT:    st.param.b32 [func_retval0+0], %r3;
-; SM70-NEXT:    ret;
-;
-; SM80-LABEL: test_fptoui_i16(
-; SM80:       {
-; SM80-NEXT:    .reg .b16 %rs<3>;
-; SM80-NEXT:    .reg .b32 %r<2>;
-; SM80-NEXT:    .reg .f32 %f<2>;
-; SM80-EMPTY:
-; SM80-NEXT:  // %bb.0:
-; SM80-NEXT:    ld.param.b16 %rs1, [test_fptoui_i16_param_0];
-; SM80-NEXT:    cvt.f32.bf16 %f1, %rs1;
-; SM80-NEXT:    cvt.rzi.u16.f32 %rs2, %f1;
-; SM80-NEXT:    cvt.u32.u16 %r1, %rs2;
-; SM80-NEXT:    st.param.b32 [func_retval0+0], %r1;
-; SM80-NEXT:    ret;
-;
-; SM80-FTZ-LABEL: test_fptoui_i16(
-; SM80-FTZ:       {
-; SM80-FTZ-NEXT:    .reg .b16 %rs<3>;
-; SM80-FTZ-NEXT:    .reg .b32 %r<2>;
-; SM80-FTZ-NEXT:    .reg .f32 %f<2>;
-; SM80-FTZ-EMPTY:
-; SM80-FTZ-NEXT:  // %bb.0:
-; SM80-FTZ-NEXT:    ld.param.b16 %rs1, [test_fptoui_i16_param_0];
-; SM80-FTZ-NEXT:    cvt.ftz.f32.bf16 %f1, %rs1;
-; SM80-FTZ-NEXT:    cvt.rzi.ftz.u16.f32 %rs2, %f1;
-; SM80-FTZ-NEXT:    cvt.u32.u16 %r1, %rs2;
-; SM80-FTZ-NEXT:    st.param.b32 [func_retval0+0], %r1;
-; SM80-FTZ-NEXT:    ret;
-;
-; SM90-LABEL: test_fptoui_i16(
-; SM90:       {
-; SM90-NEXT:    .reg .b16 %rs<3>;
-; SM90-NEXT:    .reg .b32 %r<2>;
-; SM90-EMPTY:
-; SM90-NEXT:  // %bb.0:
-; SM90-NEXT:    ld.param.b16 %rs1, [test_fptoui_i16_param_0];
-; SM90-NEXT:    cvt.rzi.u16.bf16 %rs2, %rs1;
-; SM90-NEXT:    cvt.u32.u16 %r1, %rs2;
-; SM90-NEXT:    st.param.b32 [func_retval0+0], %r1;
-; SM90-NEXT:    ret;
+; CHECK-LABEL: test_fptoui_i16(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<3>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b16 %rs1, [test_fptoui_i16_param_0];
+; CHECK-NEXT:    cvt.rzi.u16.bf16 %rs2, %rs1;
+; CHECK-NEXT:    cvt.u32.u16 %r1, %rs2;
+; CHECK-NEXT:    st.param.b32 [func_retval0+0], %r1;
+; CHECK-NEXT:    ret;
   %r = fptoui bfloat %a to i16
   ret i16 %r
 }

>From fef30011dfc05646181a8d8069cc61d98c1ce657 Mon Sep 17 00:00:00 2001
From: justinfargnoli <justinfargnoli at gmail.com>
Date: Sat, 7 Dec 2024 10:35:25 -0800
Subject: [PATCH 2/2] Delete code

---
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 18 ------------------
 llvm/lib/Target/NVPTX/NVPTXISelLowering.h   |  1 -
 2 files changed, 19 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 4c50a2e677d3f2..5d84611505f6ba 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -2702,21 +2702,6 @@ SDValue NVPTXTargetLowering::LowerINT_TO_FP(SDValue Op,
   return Op;
 }
 
-SDValue NVPTXTargetLowering::LowerFP_TO_INT(SDValue Op,
-                                            SelectionDAG &DAG) const {
-  assert(STI.getSmVersion() < 90 || STI.getPTXVersion() < 78);
-
-  if (Op.getOperand(0).getValueType() == MVT::bf16) {
-    SDLoc Loc(Op);
-    return DAG.getNode(
-        Op.getOpcode(), Loc, Op.getValueType(),
-        DAG.getNode(ISD::FP_EXTEND, Loc, MVT::f32, Op.getOperand(0)));
-  }
-
-  // Everything else is considered legal.
-  return Op;
-}
-
 SDValue NVPTXTargetLowering::LowerFP_ROUND(SDValue Op,
                                            SelectionDAG &DAG) const {
   EVT NarrowVT = Op.getValueType();
@@ -2842,9 +2827,6 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:
     return LowerINT_TO_FP(Op, DAG);
-  case ISD::FP_TO_SINT:
-  case ISD::FP_TO_UINT:
-    return LowerFP_TO_INT(Op, DAG);
   case ISD::FP_ROUND:
     return LowerFP_ROUND(Op, DAG);
   case ISD::FP_EXTEND:
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 70e16eee346aa2..5d666fc60a025c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -631,7 +631,6 @@ class NVPTXTargetLowering : public TargetLowering {
   SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;



More information about the llvm-commits mailing list