[llvm] [NVPTX] Cleanup ISel code after float register removal, use BasicNVPTXInst (PR #141711)

Alex MacLean via llvm-commits llvm-commits at lists.llvm.org
Thu May 29 07:46:12 PDT 2025


================
@@ -2532,295 +2468,186 @@ let mayStore=1, hasSideEffects=0 in {
   defm STV_i16 : ST_VEC<Int16Regs>;
   defm STV_i32 : ST_VEC<Int32Regs, support_v8 = true>;
   defm STV_i64 : ST_VEC<Int64Regs>;
-  defm STV_f32 : ST_VEC<Float32Regs, support_v8 = true>;
-  defm STV_f64 : ST_VEC<Float64Regs>;
 }
 
 //---- Conversion ----
 
-class F_BITCONVERT<string SzStr, ValueType TIn, ValueType TOut,
-  NVPTXRegClass regclassIn = ValueToRegClass<TIn>.ret,
-  NVPTXRegClass regclassOut = ValueToRegClass<TOut>.ret> :
-           NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
-           !strconcat("mov.b", SzStr, " \t$d, $a;"),
-     [(set TOut:$d, (bitconvert TIn:$a))]>;
-
-def BITCONVERT_32_I2F : F_BITCONVERT<"32", i32, f32>;
-def BITCONVERT_32_F2I : F_BITCONVERT<"32", f32, i32>;
-def BITCONVERT_64_I2F : F_BITCONVERT<"64", i64, f64>;
-def BITCONVERT_64_F2I : F_BITCONVERT<"64", f64, i64>;
-
-foreach vt = [v2f16, v2bf16, v2i16, v4i8] in {
-def: Pat<(vt (bitconvert (f32 Float32Regs:$a))),
-         (BITCONVERT_32_F2I $a)>;
-def: Pat<(f32 (bitconvert vt:$a)),
-         (BITCONVERT_32_I2F $a)>;
-}
-foreach vt = [f16, bf16] in {
-  def: Pat<(vt (bitconvert i16:$a)),
-           (vt Int16Regs:$a)>;
-  def: Pat<(i16 (bitconvert vt:$a)),
-           (i16 Int16Regs:$a)>;
-}
-
-foreach ta = [v2f16, v2bf16, v2i16, v4i8, i32] in {
-  foreach tb = [v2f16, v2bf16, v2i16, v4i8, i32] in {
-    if !ne(ta, tb) then {
-      def: Pat<(ta (bitconvert tb:$a)),
-               (ta Int32Regs:$a)>;
-    }
-  }
-}
+foreach rc = [Int16Regs, Int32Regs, Int64Regs] in
+  foreach ta = rc.RegTypes in
+    foreach tb = rc.RegTypes in
+      if !ne(ta, tb) then
+        def : Pat<(ta (bitconvert tb:$a)),
+                  (ta rc:$a)>;
 
 // NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where
 // we cannot specify floating-point literals in isel patterns.  Therefore, we
 // use an integer selp to select either 1 (or -1 in case of signed) or 0
 // and then cvt to floating-point.
 
 // sint -> f16
-def : Pat<(f16 (sint_to_fp i1:$a)),
-          (CVT_f16_s32 (SELP_b32ii -1, 0, $a), CvtRN)>;
-def : Pat<(f16 (sint_to_fp Int16Regs:$a)),
-          (CVT_f16_s16 $a, CvtRN)>;
-def : Pat<(f16 (sint_to_fp i32:$a)),
-          (CVT_f16_s32 $a, CvtRN)>;
-def : Pat<(f16 (sint_to_fp i64:$a)),
-          (CVT_f16_s64 $a, CvtRN)>;
+def : Pat<(f16 (sint_to_fp  i1:$a)), (CVT_f16_s32 (SELP_b32ii -1, 0, $a), CvtRN)>;
+def : Pat<(f16 (sint_to_fp i16:$a)), (CVT_f16_s16 $a, CvtRN)>;
+def : Pat<(f16 (sint_to_fp i32:$a)), (CVT_f16_s32 $a, CvtRN)>;
+def : Pat<(f16 (sint_to_fp i64:$a)), (CVT_f16_s64 $a, CvtRN)>;
 
 // uint -> f16
-def : Pat<(f16 (uint_to_fp i1:$a)),
-          (CVT_f16_u32 (SELP_b32ii 1, 0, $a), CvtRN)>;
-def : Pat<(f16 (uint_to_fp Int16Regs:$a)),
-          (CVT_f16_u16 $a, CvtRN)>;
-def : Pat<(f16 (uint_to_fp i32:$a)),
-          (CVT_f16_u32 $a, CvtRN)>;
-def : Pat<(f16 (uint_to_fp i64:$a)),
-          (CVT_f16_u64 $a, CvtRN)>;
+def : Pat<(f16 (uint_to_fp  i1:$a)), (CVT_f16_u32 (SELP_b32ii 1, 0, $a), CvtRN)>;
+def : Pat<(f16 (uint_to_fp i16:$a)), (CVT_f16_u16 $a, CvtRN)>;
+def : Pat<(f16 (uint_to_fp i32:$a)), (CVT_f16_u32 $a, CvtRN)>;
+def : Pat<(f16 (uint_to_fp i64:$a)), (CVT_f16_u64 $a, CvtRN)>;
 
 // sint -> bf16
-def : Pat<(bf16 (sint_to_fp i1:$a)),
-          (CVT_bf16_s32 (SELP_b32ii 1, 0, $a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
-def : Pat<(bf16 (sint_to_fp i16:$a)),
-          (CVT_bf16_s16 $a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
-def : Pat<(bf16 (sint_to_fp i32:$a)),
-          (CVT_bf16_s32 $a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
-def : Pat<(bf16 (sint_to_fp i64:$a)),
-          (CVT_bf16_s64 $a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
+def : Pat<(bf16 (sint_to_fp i1:$a)), (CVT_bf16_s32 (SELP_b32ii 1, 0, $a), CvtRN)>, 
+      Requires<[hasPTX<78>, hasSM<90>]>;
+def : Pat<(bf16 (sint_to_fp i16:$a)), (CVT_bf16_s16 $a, CvtRN)>,
+      Requires<[hasPTX<78>, hasSM<90>]>;
+def : Pat<(bf16 (sint_to_fp i32:$a)), (CVT_bf16_s32 $a, CvtRN)>,
+      Requires<[hasPTX<78>, hasSM<90>]>;
+def : Pat<(bf16 (sint_to_fp i64:$a)), (CVT_bf16_s64 $a, CvtRN)>,
+      Requires<[hasPTX<78>, hasSM<90>]>;
----------------
AlexMaclean wrote:

This is a good idea. However, I think this change is already big enough that I'd prefer to leave it as follow up for subsequent cleanup.

https://github.com/llvm/llvm-project/pull/141711


More information about the llvm-commits mailing list