[llvm] [NVPTX] Cleanup ISel code after float register removal, use BasicNVPTXInst (PR #141711)
Artem Belevich via llvm-commits
llvm-commits at lists.llvm.org
Wed May 28 10:33:26 PDT 2025
================
@@ -2532,295 +2468,186 @@ let mayStore=1, hasSideEffects=0 in {
defm STV_i16 : ST_VEC<Int16Regs>;
defm STV_i32 : ST_VEC<Int32Regs, support_v8 = true>;
defm STV_i64 : ST_VEC<Int64Regs>;
- defm STV_f32 : ST_VEC<Float32Regs, support_v8 = true>;
- defm STV_f64 : ST_VEC<Float64Regs>;
}
//---- Conversion ----
-class F_BITCONVERT<string SzStr, ValueType TIn, ValueType TOut,
- NVPTXRegClass regclassIn = ValueToRegClass<TIn>.ret,
- NVPTXRegClass regclassOut = ValueToRegClass<TOut>.ret> :
- NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
- !strconcat("mov.b", SzStr, " \t$d, $a;"),
- [(set TOut:$d, (bitconvert TIn:$a))]>;
-
-def BITCONVERT_32_I2F : F_BITCONVERT<"32", i32, f32>;
-def BITCONVERT_32_F2I : F_BITCONVERT<"32", f32, i32>;
-def BITCONVERT_64_I2F : F_BITCONVERT<"64", i64, f64>;
-def BITCONVERT_64_F2I : F_BITCONVERT<"64", f64, i64>;
-
-foreach vt = [v2f16, v2bf16, v2i16, v4i8] in {
-def: Pat<(vt (bitconvert (f32 Float32Regs:$a))),
- (BITCONVERT_32_F2I $a)>;
-def: Pat<(f32 (bitconvert vt:$a)),
- (BITCONVERT_32_I2F $a)>;
-}
-foreach vt = [f16, bf16] in {
- def: Pat<(vt (bitconvert i16:$a)),
- (vt Int16Regs:$a)>;
- def: Pat<(i16 (bitconvert vt:$a)),
- (i16 Int16Regs:$a)>;
-}
-
-foreach ta = [v2f16, v2bf16, v2i16, v4i8, i32] in {
- foreach tb = [v2f16, v2bf16, v2i16, v4i8, i32] in {
- if !ne(ta, tb) then {
- def: Pat<(ta (bitconvert tb:$a)),
- (ta Int32Regs:$a)>;
- }
- }
-}
+foreach rc = [Int16Regs, Int32Regs, Int64Regs] in
+ foreach ta = rc.RegTypes in
+ foreach tb = rc.RegTypes in
+ if !ne(ta, tb) then
+ def : Pat<(ta (bitconvert tb:$a)),
+ (ta rc:$a)>;
// NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where
// we cannot specify floating-point literals in isel patterns. Therefore, we
// use an integer selp to select either 1 (or -1 in case of signed) or 0
// and then cvt to floating-point.
// sint -> f16
-def : Pat<(f16 (sint_to_fp i1:$a)),
- (CVT_f16_s32 (SELP_b32ii -1, 0, $a), CvtRN)>;
-def : Pat<(f16 (sint_to_fp Int16Regs:$a)),
- (CVT_f16_s16 $a, CvtRN)>;
-def : Pat<(f16 (sint_to_fp i32:$a)),
- (CVT_f16_s32 $a, CvtRN)>;
-def : Pat<(f16 (sint_to_fp i64:$a)),
- (CVT_f16_s64 $a, CvtRN)>;
+def : Pat<(f16 (sint_to_fp i1:$a)), (CVT_f16_s32 (SELP_b32ii -1, 0, $a), CvtRN)>;
+def : Pat<(f16 (sint_to_fp i16:$a)), (CVT_f16_s16 $a, CvtRN)>;
+def : Pat<(f16 (sint_to_fp i32:$a)), (CVT_f16_s32 $a, CvtRN)>;
+def : Pat<(f16 (sint_to_fp i64:$a)), (CVT_f16_s64 $a, CvtRN)>;
// uint -> f16
-def : Pat<(f16 (uint_to_fp i1:$a)),
- (CVT_f16_u32 (SELP_b32ii 1, 0, $a), CvtRN)>;
-def : Pat<(f16 (uint_to_fp Int16Regs:$a)),
- (CVT_f16_u16 $a, CvtRN)>;
-def : Pat<(f16 (uint_to_fp i32:$a)),
- (CVT_f16_u32 $a, CvtRN)>;
-def : Pat<(f16 (uint_to_fp i64:$a)),
- (CVT_f16_u64 $a, CvtRN)>;
+def : Pat<(f16 (uint_to_fp i1:$a)), (CVT_f16_u32 (SELP_b32ii 1, 0, $a), CvtRN)>;
+def : Pat<(f16 (uint_to_fp i16:$a)), (CVT_f16_u16 $a, CvtRN)>;
+def : Pat<(f16 (uint_to_fp i32:$a)), (CVT_f16_u32 $a, CvtRN)>;
+def : Pat<(f16 (uint_to_fp i64:$a)), (CVT_f16_u64 $a, CvtRN)>;
// sint -> bf16
-def : Pat<(bf16 (sint_to_fp i1:$a)),
- (CVT_bf16_s32 (SELP_b32ii 1, 0, $a), CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
-def : Pat<(bf16 (sint_to_fp i16:$a)),
- (CVT_bf16_s16 $a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
-def : Pat<(bf16 (sint_to_fp i32:$a)),
- (CVT_bf16_s32 $a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
-def : Pat<(bf16 (sint_to_fp i64:$a)),
- (CVT_bf16_s64 $a, CvtRN)>, Requires<[hasPTX<78>, hasSM<90>]>;
+def : Pat<(bf16 (sint_to_fp i1:$a)), (CVT_bf16_s32 (SELP_b32ii 1, 0, $a), CvtRN)>,
+ Requires<[hasPTX<78>, hasSM<90>]>;
+def : Pat<(bf16 (sint_to_fp i16:$a)), (CVT_bf16_s16 $a, CvtRN)>,
+ Requires<[hasPTX<78>, hasSM<90>]>;
+def : Pat<(bf16 (sint_to_fp i32:$a)), (CVT_bf16_s32 $a, CvtRN)>,
+ Requires<[hasPTX<78>, hasSM<90>]>;
+def : Pat<(bf16 (sint_to_fp i64:$a)), (CVT_bf16_s64 $a, CvtRN)>,
+ Requires<[hasPTX<78>, hasSM<90>]>;
----------------
Artem-B wrote:
I wonder if there's a way to move common predicates into encompassing
`let Predicates =[...] in {}`
That could make small blocks with common predicates more concise.
https://github.com/llvm/llvm-project/pull/141711
More information about the llvm-commits
mailing list