[llvm] [NVPTX] Cleanup ISel code after float register removal, use BasicNVPTXInst (PR #141711)
via llvm-commits
llvm-commits at lists.llvm.org
Tue May 27 19:46:10 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
Author: Alex MacLean (AlexMaclean)
<details>
<summary>Changes</summary>
---
Patch is 398.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141711.diff
19 Files Affected:
- (modified) llvm/lib/Target/NVPTX/NVPTXForwardParams.cpp (-6)
- (modified) llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp (+46-72)
- (modified) llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (+529-728)
- (modified) llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (+453-656)
- (modified) llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp (+4-6)
- (modified) llvm/test/CodeGen/NVPTX/bf16-instructions.ll (+299-299)
- (modified) llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll (+25-25)
- (modified) llvm/test/CodeGen/NVPTX/f16-instructions.ll (+2-4)
- (modified) llvm/test/CodeGen/NVPTX/f16x2-instructions.ll (+49-59)
- (modified) llvm/test/CodeGen/NVPTX/fexp2.ll (+60-60)
- (modified) llvm/test/CodeGen/NVPTX/flog2.ll (+40-40)
- (modified) llvm/test/CodeGen/NVPTX/fma-relu-contract.ll (+298-298)
- (modified) llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll (+230-230)
- (modified) llvm/test/CodeGen/NVPTX/fma-relu-instruction-flag.ll (+478-478)
- (modified) llvm/test/CodeGen/NVPTX/i8x4-instructions.ll (+4-6)
- (modified) llvm/test/CodeGen/NVPTX/inline-asm.ll (+3-3)
- (modified) llvm/test/CodeGen/NVPTX/math-intrins.ll (+124-124)
- (modified) llvm/test/CodeGen/NVPTX/param-add.ll (+11-11)
- (modified) llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir (+7-7)
``````````diff
diff --git a/llvm/lib/Target/NVPTX/NVPTXForwardParams.cpp b/llvm/lib/Target/NVPTX/NVPTXForwardParams.cpp
index 008209785a683..cd404819cb837 100644
--- a/llvm/lib/Target/NVPTX/NVPTXForwardParams.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXForwardParams.cpp
@@ -53,16 +53,10 @@ static bool traverseMoveUse(MachineInstr &U, const MachineRegisterInfo &MRI,
SmallVectorImpl<MachineInstr *> &RemoveList,
SmallVectorImpl<MachineInstr *> &LoadInsts) {
switch (U.getOpcode()) {
- case NVPTX::LD_f32:
- case NVPTX::LD_f64:
case NVPTX::LD_i16:
case NVPTX::LD_i32:
case NVPTX::LD_i64:
case NVPTX::LD_i8:
- case NVPTX::LDV_f32_v2:
- case NVPTX::LDV_f32_v4:
- case NVPTX::LDV_f64_v2:
- case NVPTX::LDV_f64_v4:
case NVPTX::LDV_i16_v2:
case NVPTX::LDV_i16_v4:
case NVPTX::LDV_i32_v2:
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index b05a4713e6340..b1f653f9c3aed 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -1015,33 +1015,29 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
// Helper function template to reduce amount of boilerplate code for
// opcode selection.
-static std::optional<unsigned> pickOpcodeForVT(
- MVT::SimpleValueType VT, std::optional<unsigned> Opcode_i8,
- std::optional<unsigned> Opcode_i16, std::optional<unsigned> Opcode_i32,
- std::optional<unsigned> Opcode_i64, std::optional<unsigned> Opcode_f32,
- std::optional<unsigned> Opcode_f64) {
+static std::optional<unsigned>
+pickOpcodeForVT(MVT::SimpleValueType VT, std::optional<unsigned> Opcode_i8,
+ std::optional<unsigned> Opcode_i16,
+ std::optional<unsigned> Opcode_i32,
+ std::optional<unsigned> Opcode_i64) {
switch (VT) {
case MVT::i1:
case MVT::i8:
return Opcode_i8;
- case MVT::i16:
- return Opcode_i16;
- case MVT::i32:
- return Opcode_i32;
- case MVT::i64:
- return Opcode_i64;
case MVT::f16:
+ case MVT::i16:
case MVT::bf16:
return Opcode_i16;
case MVT::v2f16:
case MVT::v2bf16:
case MVT::v2i16:
case MVT::v4i8:
- return Opcode_i32;
+ case MVT::i32:
case MVT::f32:
- return Opcode_f32;
+ return Opcode_i32;
+ case MVT::i64:
case MVT::f64:
- return Opcode_f64;
+ return Opcode_i64;
default:
return std::nullopt;
}
@@ -1101,9 +1097,8 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
Chain};
const MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
- const std::optional<unsigned> Opcode =
- pickOpcodeForVT(TargetVT, NVPTX::LD_i8, NVPTX::LD_i16, NVPTX::LD_i32,
- NVPTX::LD_i64, NVPTX::LD_f32, NVPTX::LD_f64);
+ const std::optional<unsigned> Opcode = pickOpcodeForVT(
+ TargetVT, NVPTX::LD_i8, NVPTX::LD_i16, NVPTX::LD_i32, NVPTX::LD_i64);
if (!Opcode)
return false;
@@ -1203,22 +1198,19 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
default:
return false;
case NVPTXISD::LoadV2:
- Opcode =
- pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2,
- NVPTX::LDV_i16_v2, NVPTX::LDV_i32_v2, NVPTX::LDV_i64_v2,
- NVPTX::LDV_f32_v2, NVPTX::LDV_f64_v2);
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2,
+ NVPTX::LDV_i16_v2, NVPTX::LDV_i32_v2,
+ NVPTX::LDV_i64_v2);
break;
case NVPTXISD::LoadV4:
- Opcode =
- pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4,
- NVPTX::LDV_i16_v4, NVPTX::LDV_i32_v4, NVPTX::LDV_i64_v4,
- NVPTX::LDV_f32_v4, NVPTX::LDV_f64_v4);
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4,
+ NVPTX::LDV_i16_v4, NVPTX::LDV_i32_v4,
+ NVPTX::LDV_i64_v4);
break;
case NVPTXISD::LoadV8:
Opcode =
pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, {/* no v8i8 */},
- {/* no v8i16 */}, NVPTX::LDV_i32_v8, {/* no v8i64 */},
- NVPTX::LDV_f32_v8, {/* no v8f64 */});
+ {/* no v8i16 */}, NVPTX::LDV_i32_v8, {/* no v8i64 */});
break;
}
if (!Opcode)
@@ -1286,48 +1278,42 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
Opcode = pickOpcodeForVT(
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_GLOBAL_i8,
NVPTX::INT_PTX_LDG_GLOBAL_i16, NVPTX::INT_PTX_LDG_GLOBAL_i32,
- NVPTX::INT_PTX_LDG_GLOBAL_i64, NVPTX::INT_PTX_LDG_GLOBAL_f32,
- NVPTX::INT_PTX_LDG_GLOBAL_f64);
+ NVPTX::INT_PTX_LDG_GLOBAL_i64);
break;
case ISD::INTRINSIC_W_CHAIN:
Opcode = pickOpcodeForVT(
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_GLOBAL_i8,
NVPTX::INT_PTX_LDU_GLOBAL_i16, NVPTX::INT_PTX_LDU_GLOBAL_i32,
- NVPTX::INT_PTX_LDU_GLOBAL_i64, NVPTX::INT_PTX_LDU_GLOBAL_f32,
- NVPTX::INT_PTX_LDU_GLOBAL_f64);
+ NVPTX::INT_PTX_LDU_GLOBAL_i64);
break;
case NVPTXISD::LoadV2:
Opcode = pickOpcodeForVT(
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v2i8_ELE,
NVPTX::INT_PTX_LDG_G_v2i16_ELE, NVPTX::INT_PTX_LDG_G_v2i32_ELE,
- NVPTX::INT_PTX_LDG_G_v2i64_ELE, NVPTX::INT_PTX_LDG_G_v2f32_ELE,
- NVPTX::INT_PTX_LDG_G_v2f64_ELE);
+ NVPTX::INT_PTX_LDG_G_v2i64_ELE);
break;
case NVPTXISD::LDUV2:
Opcode = pickOpcodeForVT(
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v2i8_ELE,
NVPTX::INT_PTX_LDU_G_v2i16_ELE, NVPTX::INT_PTX_LDU_G_v2i32_ELE,
- NVPTX::INT_PTX_LDU_G_v2i64_ELE, NVPTX::INT_PTX_LDU_G_v2f32_ELE,
- NVPTX::INT_PTX_LDU_G_v2f64_ELE);
+ NVPTX::INT_PTX_LDU_G_v2i64_ELE);
break;
case NVPTXISD::LoadV4:
Opcode = pickOpcodeForVT(
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE,
NVPTX::INT_PTX_LDG_G_v4i16_ELE, NVPTX::INT_PTX_LDG_G_v4i32_ELE,
- NVPTX::INT_PTX_LDG_G_v4i64_ELE, NVPTX::INT_PTX_LDG_G_v4f32_ELE,
- NVPTX::INT_PTX_LDG_G_v4f64_ELE);
+ NVPTX::INT_PTX_LDG_G_v4i64_ELE);
break;
case NVPTXISD::LDUV4:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE,
- NVPTX::INT_PTX_LDU_G_v4i16_ELE, NVPTX::INT_PTX_LDU_G_v4i32_ELE,
- {/* no v4i64 */}, NVPTX::INT_PTX_LDU_G_v4f32_ELE, {/* no v4f64 */});
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_G_v4i8_ELE,
+ NVPTX::INT_PTX_LDU_G_v4i16_ELE,
+ NVPTX::INT_PTX_LDU_G_v4i32_ELE, {/* no v4i64 */});
break;
case NVPTXISD::LoadV8:
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, {/* no v8i8 */},
{/* no v8i16 */}, NVPTX::INT_PTX_LDG_G_v8i32_ELE,
- {/* no v8i64 */}, NVPTX::INT_PTX_LDG_G_v8f32_ELE,
- {/* no v8f64 */});
+ {/* no v8i64 */});
break;
}
if (!Opcode)
@@ -1421,9 +1407,8 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
const MVT::SimpleValueType SourceVT =
Value.getNode()->getSimpleValueType(0).SimpleTy;
- const std::optional<unsigned> Opcode =
- pickOpcodeForVT(SourceVT, NVPTX::ST_i8, NVPTX::ST_i16, NVPTX::ST_i32,
- NVPTX::ST_i64, NVPTX::ST_f32, NVPTX::ST_f64);
+ const std::optional<unsigned> Opcode = pickOpcodeForVT(
+ SourceVT, NVPTX::ST_i8, NVPTX::ST_i16, NVPTX::ST_i32, NVPTX::ST_i64);
if (!Opcode)
return false;
@@ -1486,22 +1471,19 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
default:
return false;
case NVPTXISD::StoreV2:
- Opcode =
- pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2,
- NVPTX::STV_i16_v2, NVPTX::STV_i32_v2, NVPTX::STV_i64_v2,
- NVPTX::STV_f32_v2, NVPTX::STV_f64_v2);
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2,
+ NVPTX::STV_i16_v2, NVPTX::STV_i32_v2,
+ NVPTX::STV_i64_v2);
break;
case NVPTXISD::StoreV4:
- Opcode =
- pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4,
- NVPTX::STV_i16_v4, NVPTX::STV_i32_v4, NVPTX::STV_i64_v4,
- NVPTX::STV_f32_v4, NVPTX::STV_f64_v4);
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4,
+ NVPTX::STV_i16_v4, NVPTX::STV_i32_v4,
+ NVPTX::STV_i64_v4);
break;
case NVPTXISD::StoreV8:
Opcode =
pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, {/* no v8i8 */},
- {/* no v8i16 */}, NVPTX::STV_i32_v8, {/* no v8i64 */},
- NVPTX::STV_f32_v8, {/* no v8f64 */});
+ {/* no v8i16 */}, NVPTX::STV_i32_v8, {/* no v8i64 */});
break;
}
@@ -1550,21 +1532,18 @@ bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
case 1:
Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
- NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
- NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
+ NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64);
break;
case 2:
Opcode =
pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
- NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F32,
- NVPTX::LoadParamMemV2F64);
+ NVPTX::LoadParamMemV2I64);
break;
case 4:
Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
NVPTX::LoadParamMemV4I8, NVPTX::LoadParamMemV4I16,
- NVPTX::LoadParamMemV4I32, {/* no v4i64 */},
- NVPTX::LoadParamMemV4F32, {/* no v4f64 */});
+ NVPTX::LoadParamMemV4I32, {/* no v4i64 */});
break;
}
if (!Opcode)
@@ -1628,8 +1607,7 @@ bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
case 1:
Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
- NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
- NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
+ NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64);
if (Opcode == NVPTX::StoreRetvalI8) {
// Fine tune the opcode depending on the size of the operand.
// This helps to avoid creating redundant COPY instructions in
@@ -1649,14 +1627,12 @@ bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
case 2:
Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
- NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
- NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
+ NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64);
break;
case 4:
Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
- NVPTX::StoreRetvalV4I32, {/* no v4i64 */},
- NVPTX::StoreRetvalV4F32, {/* no v4f64 */});
+ NVPTX::StoreRetvalV4I32, {/* no v4i64 */});
break;
}
if (!Opcode)
@@ -1827,14 +1803,12 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
// Use immediate version of store param
Opcode = pickOpcodeForVT(MemTy, NVPTX::StoreParamI8_i,
NVPTX::StoreParamI16_i, NVPTX::StoreParamI32_i,
- NVPTX::StoreParamI64_i, NVPTX::StoreParamF32_i,
- NVPTX::StoreParamF64_i);
+ NVPTX::StoreParamI64_i);
} else
Opcode =
pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
NVPTX::StoreParamI8_r, NVPTX::StoreParamI16_r,
- NVPTX::StoreParamI32_r, NVPTX::StoreParamI64_r,
- NVPTX::StoreParamF32_r, NVPTX::StoreParamF64_r);
+ NVPTX::StoreParamI32_r, NVPTX::StoreParamI64_r);
if (Opcode == NVPTX::StoreParamI8_r) {
// Fine tune the opcode depending on the size of the operand.
// This helps to avoid creating redundant COPY instructions in
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index da50c1fa68b69..883a2ddf80d4c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -208,6 +208,9 @@ class ValueToRegClass<ValueType T> {
class OneUse1<SDPatternOperator operator>
: PatFrag<(ops node:$A), (operator node:$A), [{ return N->hasOneUse(); }]>;
+class OneUse2<SDPatternOperator operator>
+ : PatFrag<(ops node:$A, node:$B), (operator node:$A, node:$B), [{ return N->hasOneUse(); }]>;
+
class fpimm_pos_inf<ValueType vt>
: FPImmLeaf<vt, [{ return Imm.isPosInfinity(); }]>;
@@ -282,22 +285,20 @@ class BasicNVPTXInst<dag outs, dag insv, string asmstr, list<dag> pattern = []>
multiclass I3Inst<string op_str, SDPatternOperator op_node, RegTyInfo t,
bit commutative, list<Predicate> requires = []> {
- defvar asmstr = op_str # " \t$dst, $a, $b;";
-
def rr :
- NVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b),
- asmstr,
+ BasicNVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b),
+ op_str,
[(set t.Ty:$dst, (op_node t.Ty:$a, t.Ty:$b))]>,
Requires<requires>;
def ri :
- NVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.Imm:$b),
- asmstr,
+ BasicNVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.Imm:$b),
+ op_str,
[(set t.Ty:$dst, (op_node t.Ty:$a, (t.Ty imm:$b)))]>,
Requires<requires>;
if !not(commutative) then
def ir :
- NVPTXInst<(outs t.RC:$dst), (ins t.Imm:$a, t.RC:$b),
- asmstr,
+ BasicNVPTXInst<(outs t.RC:$dst), (ins t.Imm:$a, t.RC:$b),
+ op_str,
[(set t.Ty:$dst, (op_node (t.Ty imm:$a), t.Ty:$b))]>,
Requires<requires>;
}
@@ -310,8 +311,8 @@ multiclass I3<string op_str, SDPatternOperator op_node, bit commutative> {
}
class I16x2<string OpcStr, SDNode OpNode> :
- NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
- !strconcat(OpcStr, "16x2 \t$dst, $a, $b;"),
+ BasicNVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ OpcStr # "16x2",
[(set v2i16:$dst, (OpNode v2i16:$a, v2i16:$b))]>,
Requires<[hasPTX<80>, hasSM<90>]>;
@@ -332,74 +333,74 @@ multiclass ADD_SUB_INT_CARRY<string op_str, SDNode op_node, bit commutative> {
multiclass FMINIMUMMAXIMUM<string OpcStr, bit NaN, SDNode OpNode> {
if !not(NaN) then {
def f64rr :
- NVPTXInst<(outs Float64Regs:$dst),
+ BasicNVPTXInst<(outs Float64Regs:$dst),
(ins Float64Regs:$a, Float64Regs:$b),
- !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
+ OpcStr # ".f64",
[(set f64:$dst, (OpNode f64:$a, f64:$b))]>;
def f64ri :
- NVPTXInst<(outs Float64Regs:$dst),
+ BasicNVPTXInst<(outs Float64Regs:$dst),
(ins Float64Regs:$a, f64imm:$b),
- !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
+ OpcStr # ".f64",
[(set f64:$dst, (OpNode f64:$a, fpimm:$b))]>;
}
def f32rr_ftz :
- NVPTXInst<(outs Float32Regs:$dst),
+ BasicNVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, Float32Regs:$b),
- !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
+ OpcStr # ".ftz.f32",
[(set f32:$dst, (OpNode f32:$a, f32:$b))]>,
Requires<[doF32FTZ]>;
def f32ri_ftz :
- NVPTXInst<(outs Float32Regs:$dst),
+ BasicNVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, f32imm:$b),
- !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
+ OpcStr # ".ftz.f32",
[(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>,
Requires<[doF32FTZ]>;
def f32rr :
- NVPTXInst<(outs Float32Regs:$dst),
+ BasicNVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, Float32Regs:$b),
- !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
+ OpcStr # ".f32",
[(set f32:$dst, (OpNode f32:$a, f32:$b))]>;
def f32ri :
- NVPTXInst<(outs Float32Regs:$dst),
+ BasicNVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, f32imm:$b),
- !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
+ OpcStr # ".f32",
[(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>;
def f16rr_ftz :
- NVPTXInst<(outs Int16Regs:$dst),
+ BasicNVPTXInst<(outs Int16Regs:$dst),
(ins Int16Regs:$a, Int16Regs:$b),
- !strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"),
+ OpcStr # ".ftz.f16",
[(set f16:$dst, (OpNode f16:$a, f16:$b))]>,
Requires<[useFP16Math, doF32FTZ]>;
def f16rr :
- NVPTXInst<(outs Int16Regs:$dst),
+ BasicNVPTXInst<(outs Int16Regs:$dst),
(ins Int16Regs:$a, Int16Regs:$b),
- !strconcat(OpcStr, ".f16 \t$dst, $a, $b;"),
+ OpcStr # ".f16",
[(set f16:$dst, (OpNode f16:$a, f16:$b))]>,
Requires<[useFP16Math, hasSM<80>, hasPTX<70>]>;
def f16x2rr_ftz :
- NVPTXInst<(outs Int32Regs:$dst),
+ BasicNVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$a, Int32Regs:$b),
- !strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a, $b;"),
+ OpcStr # ".ftz.f16x2",
[(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>,
Requires<[useFP16Math, hasSM<80>, hasPTX<70>, doF32FTZ]>;
def f16x2rr :
- NVPTXInst<(outs Int32Regs:$dst),
+ BasicNVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$a, Int32Regs:$b),
- !strconcat(OpcStr, ".f16x2 \t$dst, $a, $b;"),
+ OpcStr # ".f16x2",
[(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>,
Requires<[useFP16Math, hasSM<80>, hasPTX<70>]>;
def bf16rr :
- NVPTXInst<(outs Int16Regs:$dst),
+ BasicNVPTXInst<(outs Int16Regs:$dst),
(ins Int16Regs:$a, Int16Regs:$b),
- !strconcat(OpcStr, ".bf16 \t$dst, $a, $b;"),
+ OpcStr # ".bf16",
[(set bf16:$dst, (OpNode bf16:$a, bf16:$b))]>,
Requires<[hasBF16Math, hasSM<80>, hasPTX<70>]>;
def bf16x2rr :
- NVPTXInst<(outs Int32Regs:$dst),
+ BasicNVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$a, Int32Regs:$b),
- !strconcat(OpcStr, ".bf16x2 \t$dst, $a, $b;"),
+ OpcStr # ".bf16x2",
[(set v2bf16:$dst, (OpNode v2bf16:$a, v2bf16:$b))]>,
Requires<[hasBF16Math, hasSM<80>, hasPTX<70>]>;
}
@@ -415,74 +416,74 @@ multiclass FMINIMUMMAXIMUM<string OpcStr, bit NaN, SDNode OpNode> {
// just like the non ".rn" op, but prevents ptxas from creating FMAs.
multiclass F3<string op_str, SDPatternOperator op_pat> {
def f64rr :
- NVPTXInst<(outs Float64Regs:$dst),
+ BasicNVPTXInst<(outs Float64Regs:$dst),
(ins Float64Regs:$a, Float64Regs:$b),
- op_str # ".f64 \t$dst, $a, $b;",
+ op_str # ".f64",
[(set f64:$dst, (op_pat f64:$a, f64:$b))]>;
def f64ri :
- NVPTXInst<(outs Float64Regs:$dst),
+ BasicNVPTXInst<(outs Float64Regs:$dst),
(ins Float64Regs:$a, f64imm:$b),
- op_str # ".f64 \t$dst, $a, ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/141711
More information about the llvm-commits
mailing list