[llvm] [NVPTX] Support BFloat Store Parameter (PR #137074)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 23 15:21:37 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
Author: Steffi Stumpos (stumpOS)
<details>
<summary>Changes</summary>
Before this patch, the instruction selector assumed that if the Memory Type is not {f16, v2f16, f32, f64} then the node type must be a ConstantSDNode when in fact if the memory type is bf16 then the node type is ConstantFPSDNode.
---
Full diff: https://github.com/llvm/llvm-project/pull/137074.diff
2 Files Affected:
- (modified) llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp (+4-4)
- (modified) llvm/test/CodeGen/NVPTX/st-param-imm.ll (+24)
``````````diff
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index ec1f969494cd1..e74c8828aaf1b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -583,7 +583,7 @@ getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) {
// |------------------------------------------------------|-------------------------------|
// | cuda::atomic_load | fence.sc.<scope>; |
// | (memory_order_seq_cst, cuda::thread_scope_<scope>) | ld.acquire.<scope>; |
- // |------------------------------------------------------|-------------------------------|
+ // |------------------------------------------------------|-------------------------------|
// | cuda::atomic_store | fence.sc.<scope>; |
// | (memory_order_seq_cst, cuda::thread_scope_<scope>) | st.release.<scope>; |
// |------------------------------------------------------|-------------------------------|
@@ -1852,7 +1852,7 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
case 1: {
MVT::SimpleValueType MemTy = Mem->getMemoryVT().getSimpleVT().SimpleTy;
SDValue Imm = Ops[0];
- if (MemTy != MVT::f16 && MemTy != MVT::v2f16 &&
+ if (MemTy != MVT::f16 && MemTy != MVT::v2f16 && MemTy != MVT::bf16 &&
(isa<ConstantSDNode>(Imm) || isa<ConstantFPSDNode>(Imm))) {
// Convert immediate to target constant
if (MemTy == MVT::f32 || MemTy == MVT::f64) {
@@ -2808,8 +2808,8 @@ void NVPTXDAGToDAGISel::SelectCpAsyncBulkPrefetchL2(SDNode *N) {
SDLoc DL(N);
SmallVector<SDValue, 4> Ops(N->ops().slice(2, NumArgs));
Ops.push_back(N->getOperand(0)); // Chain operand
-
- unsigned Opcode = IsCacheHint
+
+ unsigned Opcode = IsCacheHint
? NVPTX::CP_ASYNC_BULK_PREFETCH_CH
: NVPTX::CP_ASYNC_BULK_PREFETCH;
ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops));
diff --git a/llvm/test/CodeGen/NVPTX/st-param-imm.ll b/llvm/test/CodeGen/NVPTX/st-param-imm.ll
index ab1447607ab65..d5463b04b3b72 100644
--- a/llvm/test/CodeGen/NVPTX/st-param-imm.ll
+++ b/llvm/test/CodeGen/NVPTX/st-param-imm.ll
@@ -2000,3 +2000,27 @@ declare void @call_v4_i8(%struct.char4 alignstack(4))
declare void @call_v4_i16(%struct.short4 alignstack(8))
declare void @call_v4_i32(%struct.int4 alignstack(16))
declare void @call_v4_f32(%struct.float4 alignstack(16))
+
+define void @st_param_bfloat() {
+; CHECK-LABEL: st_param_bfloat(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:// %bb.0:
+; CHECK-NEXT: mov.b16 %rs1, 0x4100;
+; CHECK-NEXT: { // callseq 83, 0
+; CHECK-NEXT: .param .align 2 .b8 param0[2];
+; CHECK-NEXT: st.param.b16 [param0], %rs1;
+; CHECK-NEXT: call.uni
+; CHECK-NEXT: call_bfloat,
+; CHECK-NEXT: (
+; CHECK-NEXT: param0
+; CHECK-NEXT: );
+; CHECK-NEXT: } // callseq 83
+; CHECK-NEXT: ret;
+ %five = bitcast i16 16640 to bfloat
+ call void @call_bfloat(bfloat %five)
+ ret void
+}
+
+declare void @call_bfloat(bfloat)
``````````
</details>
https://github.com/llvm/llvm-project/pull/137074
More information about the llvm-commits
mailing list