[clang] 7940888 - [AMDGPU] Intrinsic to expose s_wait_event for export ready
David Stuttard via cfe-commits
cfe-commits at lists.llvm.org
Mon Nov 28 03:28:38 PST 2022
Author: David Stuttard
Date: 2022-11-28T11:26:15Z
New Revision: 7940888c5987de2b5cbb4ec45b482df88e822f67
URL: https://github.com/llvm/llvm-project/commit/7940888c5987de2b5cbb4ec45b482df88e822f67
DIFF: https://github.com/llvm/llvm-project/commit/7940888c5987de2b5cbb4ec45b482df88e822f67.diff
LOG: [AMDGPU] Intrinsic to expose s_wait_event for export ready
Differential Revision: https://reviews.llvm.org/D138216
Added:
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.event.ll
Modified:
clang/include/clang/Basic/BuiltinsAMDGPU.def
clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/lib/Target/AMDGPU/SOPInstructions.td
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index d4d16d5a9563d..5e64f830fb850 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -261,6 +261,7 @@ TARGET_BUILTIN(__builtin_amdgcn_image_bvh_intersect_ray_lh, "V4UiWUifV4fV4hV4hV4
// TODO: This is a no-op in wave32. Should the builtin require wavefrontsize64?
TARGET_BUILTIN(__builtin_amdgcn_permlane64, "UiUi", "nc", "gfx11-insts")
+TARGET_BUILTIN(__builtin_amdgcn_s_wait_event_export_ready, "v", "n", "gfx11-insts")
//===----------------------------------------------------------------------===//
// WMMA builtins.
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
index a4f2d610afa83..59a16900fb1a4 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
@@ -37,3 +37,9 @@ void test_ds_bvh_stack_rtn(global uint2* out, uint addr, uint data, uint4 data1)
void test_permlane64(global uint* out, uint a) {
*out = __builtin_amdgcn_permlane64(a);
}
+
+// CHECK-LABEL: @test_s_wait_event_export_ready
+// CHECK: call void @llvm.amdgcn.s.wait.event.export.ready
+void test_s_wait_event_export_ready() {
+ __builtin_amdgcn_s_wait_event_export_ready();
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 8f05eb10920c7..3e9233b1f86f9 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2067,6 +2067,10 @@ def int_amdgcn_wmma_bf16_16x16x16_bf16 : AMDGPUWmmaIntrinsicOPSEL<llvm_v16i16_ty
def int_amdgcn_wmma_i32_16x16x16_iu8 : AMDGPUWmmaIntrinsicIU<llvm_v4i32_ty, llvm_anyint_ty>;
def int_amdgcn_wmma_i32_16x16x16_iu4 : AMDGPUWmmaIntrinsicIU<llvm_v2i32_ty, llvm_anyint_ty>;
+def int_amdgcn_s_wait_event_export_ready :
+ ClangBuiltin<"__builtin_amdgcn_s_wait_event_export_ready">,
+ Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]
+>;
//===----------------------------------------------------------------------===//
// Deep learning intrinsics.
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 674da1f0ae4a5..ce0b0dfc48ced 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1388,7 +1388,9 @@ let SubtargetPredicate = isGFX10Plus in {
let SubtargetPredicate = isGFX11Plus in {
def S_WAIT_EVENT : SOPP_Pseudo<"s_wait_event", (ins s16imm:$simm16),
- "$simm16">;
+ "$simm16"> {
+ let hasSideEffects = 1;
+ }
def S_DELAY_ALU : SOPP_Pseudo<"s_delay_alu", (ins DELAY_FLAG:$simm16),
"$simm16">;
} // End SubtargetPredicate = isGFX11Plus
@@ -1430,6 +1432,10 @@ def : GCNPat<
(S_SEXT_I32_I16 $src)
>;
+def : GCNPat <
+ (int_amdgcn_s_wait_event_export_ready),
+ (S_WAIT_EVENT (i16 0))
+>;
//===----------------------------------------------------------------------===//
// SOP2 Patterns
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.event.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.event.ll
new file mode 100644
index 0000000000000..3e95e4dec67a2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.event.ll
@@ -0,0 +1,15 @@
+; RUN: llc -global-isel=0 -march=amdgcn -verify-machineinstrs -mcpu=gfx1100 < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -mcpu=gfx1100 < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}test_wait_event:
+; GCN: s_wait_event 0x0
+
+define amdgpu_ps void @test_wait_event() #0 {
+entry:
+ call void @llvm.amdgcn.s.wait.event.export.ready() #0
+ ret void
+}
+
+declare void @llvm.amdgcn.s.wait.event.export.ready() #0
+
+attributes #0 = { nounwind }
More information about the cfe-commits
mailing list