[llvm] [NVPTX] Add intrinsics for wgmma.fence PTX instructions (PR #120523)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 18 21:23:59 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
Author: Srinivasa R (Wolfram70)
<details>
<summary>Changes</summary>
This PR adds NVVM intrinsics and NVPTX codegen for:
- [wgmma.fence.sync.aligned](https://docs.nvidia.com/cuda/parallel-thread-execution/#asynchronous-warpgroup-level-matrix-instructions-wgmma-fence)
- [wgmma.commit_group.sync.aligned](https://docs.nvidia.com/cuda/parallel-thread-execution/#asynchronous-warpgroup-level-matrix-instructions-wgmma-commit-group)
- [wgmma.wait_group.sync.aligned](https://docs.nvidia.com/cuda/parallel-thread-execution/#asynchronous-warpgroup-level-matrix-instructions-wgmma-wait-group)
---
Full diff: https://github.com/llvm/llvm-project/pull/120523.diff
5 Files Affected:
- (modified) llvm/include/llvm/IR/IntrinsicsNVVM.td (+15)
- (modified) llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (+12)
- (added) llvm/test/CodeGen/NVPTX/wgmma-commit-group.ll (+15)
- (added) llvm/test/CodeGen/NVPTX/wgmma-fence.ll (+15)
- (added) llvm/test/CodeGen/NVPTX/wgmma-wait-group.ll (+15)
``````````diff
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 9834dbb70d4c1f..6f64311cc06c03 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -4805,6 +4805,21 @@ def int_nvvm_redux_sync_or : ClangBuiltin<"__nvvm_redux_sync_or">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrConvergent, IntrInaccessibleMemOnly, IntrNoCallback]>;
+//
+// WGMMA instructions
+//
+// wgmma.fence.sync.aligned;
+def int_nvvm_wgmma_fence_sync_aligned
+ : Intrinsic<[], [], [IntrConvergent], "llvm.nvvm.wgmma.fence.sync.aligned">;
+
+// wgmma.commit_group.sync.aligned;
+def int_nvvm_wgmma_commit_group_sync_aligned
+ : Intrinsic<[], [], [IntrConvergent], "llvm.nvvm.wgmma.commit_group.sync.aligned">;
+
+// wgmma.wait_group.sync.aligned N;
+def int_nvvm_wgmma_wait_group_sync_aligned
+ : Intrinsic<[], [llvm_i32_ty], [IntrConvergent, ImmArg<ArgIndex<0>>], "llvm.nvvm.wgmma.wait_group.sync.aligned">;
+
//
// WMMA instructions
//
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 256161d5d79c77..d83985b46abe56 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -7484,4 +7484,16 @@ defm INT_SET_MAXNREG_DEC : SET_MAXNREG<"dec", int_nvvm_setmaxnreg_dec_sync_align
} // isConvergent
+//
+// WGMMA instructions
+//
+def INT_NVVM_WGMMA_FENCE_SYNC_ALIGNED : NVPTXInst<(outs), (ins), "wgmma.fence.sync.aligned;",
+ [(int_nvvm_wgmma_fence_sync_aligned)]>, Requires<[hasSM90a, hasPTX<80>]>;
+
+def INT_NVVM_WGMMA_COMMIT_GROUP_SYNC_ALIGNED : NVPTXInst<(outs), (ins), "wgmma.commit_group.sync.aligned;",
+ [(int_nvvm_wgmma_commit_group_sync_aligned)]>, Requires<[hasSM90a, hasPTX<80>]>;
+
+def INT_NVVM_WGMMA_WAIT_GROUP_SYNC_ALIGNED : NVPTXInst<(outs), (ins i32imm:$n), "wgmma.wait_group.sync.aligned \t$n;",
+ [(int_nvvm_wgmma_wait_group_sync_aligned timm:$n)]>, Requires<[hasSM90a, hasPTX<80>]>;
+
def INT_EXIT : NVPTXInst<(outs), (ins), "exit;", [(int_nvvm_exit)]>;
diff --git a/llvm/test/CodeGen/NVPTX/wgmma-commit-group.ll b/llvm/test/CodeGen/NVPTX/wgmma-commit-group.ll
new file mode 100644
index 00000000000000..d2a41f44b28776
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/wgmma-commit-group.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_90a -mattr=+ptx80 | FileCheck %s
+; RUN: %if ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_90a -mattr=+ptx80 | %ptxas-verify -arch=sm_90a %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+declare void @llvm.nvvm.wgmma.commit_group.sync.aligned()
+
+define void @test_wgmma_commit_group_sync_aligned() {
+ ; CHECK-LABEL: test_wgmma_commit_group_sync_aligned(
+ ; CHECK: // %bb.0:
+ ; CHECK-NEXT: wgmma.commit_group.sync.aligned;
+ ; CHECK-NEXT: ret;
+ call void @llvm.nvvm.wgmma.commit_group.sync.aligned()
+ ret void
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/NVPTX/wgmma-fence.ll b/llvm/test/CodeGen/NVPTX/wgmma-fence.ll
new file mode 100644
index 00000000000000..24075ba93786a6
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/wgmma-fence.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_90a -mattr=+ptx80 | FileCheck %s
+; RUN: %if ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_90a -mattr=+ptx80 | %ptxas-verify -arch=sm_90a %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+declare void @llvm.nvvm.wgmma.fence.sync.aligned()
+
+define void @test_wgmma_fence_sync_aligned() {
+ ; CHECK-LABEL: test_wgmma_fence_sync_aligned(
+ ; CHECK: // %bb.0:
+ ; CHECK-NEXT: wgmma.fence.sync.aligned;
+ ; CHECK-NEXT: ret;
+ call void @llvm.nvvm.wgmma.fence.sync.aligned()
+ ret void
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/NVPTX/wgmma-wait-group.ll b/llvm/test/CodeGen/NVPTX/wgmma-wait-group.ll
new file mode 100644
index 00000000000000..ad314d2faa5fd1
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/wgmma-wait-group.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_90a -mattr=+ptx80 | FileCheck %s
+; RUN: %if ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_90a -mattr=+ptx80 | %ptxas-verify -arch=sm_90a %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+declare void @llvm.nvvm.wgmma.wait_group.sync.aligned(i32)
+
+define void @test_wgmma_wait_group_sync_aligned() {
+ ; CHECK-LABEL: test_wgmma_wait_group_sync_aligned(
+ ; CHECK: // %bb.0:
+ ; CHECK-NEXT: wgmma.wait_group.sync.aligned 10;
+ ; CHECK-NEXT: ret;
+ call void @llvm.nvvm.wgmma.wait_group.sync.aligned(i32 10)
+ ret void
+}
\ No newline at end of file
``````````
</details>
https://github.com/llvm/llvm-project/pull/120523
More information about the llvm-commits
mailing list