[llvm] 8c1b516 - AMDGPU: Implement s_wait_asynccnt and s_wait_tensorcnt for gfx1250 (#148292)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 11 15:07:25 PDT 2025
Author: Changpeng Fang
Date: 2025-07-11T15:07:21-07:00
New Revision: 8c1b5169484533a41d6a05603315a092c364975d
URL: https://github.com/llvm/llvm-project/commit/8c1b5169484533a41d6a05603315a092c364975d
DIFF: https://github.com/llvm/llvm-project/commit/8c1b5169484533a41d6a05603315a092c364975d.diff
LOG: AMDGPU: Implement s_wait_asynccnt and s_wait_tensorcnt for gfx1250 (#148292)
Co-authored-by: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Co-authored-by: Vang Thao <Vang.Thao at amd.com>
Added:
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx1250.ll
Modified:
clang/include/clang/Basic/BuiltinsAMDGPU.def
clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/lib/Target/AMDGPU/SOPInstructions.td
llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s
llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index a5ee8013adff6..4d371a9f7d6db 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -665,6 +665,9 @@ TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_barrier_arrive_rtn_b64, "LiLi*3Li", "n
TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-inst")
TARGET_BUILTIN(__builtin_amdgcn_s_monitor_sleep, "vIs", "n", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_s_wait_asynccnt, "vIUs", "n", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_s_wait_tensorcnt, "vIUs", "n", "gfx1250-insts")
+
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index 421099d3876e3..a1b91d0cc38dc 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -24,6 +24,24 @@ void test_s_monitor_sleep() {
__builtin_amdgcn_s_monitor_sleep(10);
}
+// CHECK-LABEL: @test_s_wait_asynccnt(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.amdgcn.s.wait.asynccnt(i16 0)
+// CHECK-NEXT: ret void
+//
+void test_s_wait_asynccnt() {
+ __builtin_amdgcn_s_wait_asynccnt(0);
+}
+
+// CHECK-LABEL: @test_s_wait_tensorcnt(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.amdgcn.s.wait.tensorcnt(i16 0)
+// CHECK-NEXT: ret void
+//
+void test_s_wait_tensorcnt() {
+ __builtin_amdgcn_s_wait_tensorcnt(0);
+}
+
// CHECK-LABEL: @test_cvt_f16_fp8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
index 7494c4f984353..9711b3bdded6b 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
@@ -12,6 +12,14 @@ void test_s_monitor_sleep(short a) {
__builtin_amdgcn_s_monitor_sleep(a); // expected-error {{'__builtin_amdgcn_s_monitor_sleep' must be a constant integer}}
}
+void test_s_wait_asynccnt(short a) {
+ __builtin_amdgcn_s_wait_asynccnt(a); // expected-error {{'__builtin_amdgcn_s_wait_asynccnt' must be a constant integer}}
+}
+
+void test_s_wait_tensorcnt(short a) {
+ __builtin_amdgcn_s_wait_tensorcnt(a); // expected-error {{'__builtin_amdgcn_s_wait_tensorcnt' must be a constant integer}}
+}
+
void test__builtin_amdgcn_cvt_f16_fp8(int a, int b) {
__builtin_amdgcn_cvt_f16_fp8(a, b); // expected-error {{'__builtin_amdgcn_cvt_f16_fp8' must be a constant integer}}
}
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 16885f331e9dd..8016757cf0f3c 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -3510,6 +3510,18 @@ def int_amdgcn_ashr_pk_u8_i32 : ClangBuiltin<"__builtin_amdgcn_ashr_pk_u8_i32">,
// gfx1250 intrinsics
// ===----------------------------------------------------------------------===//
+// Async waits decrement ASYNCcnt and tensor waits decrement TENSORcnt which is
+// modeled as InaccessibleMem.
+class AMDGPUWaitAsyncIntrinsic :
+ Intrinsic<[], [llvm_i16_ty],
+ [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>, IntrWillReturn, IntrNoCallback,
+ IntrNoFree]>;
+
+def int_amdgcn_s_wait_asynccnt :
+ ClangBuiltin<"__builtin_amdgcn_s_wait_asynccnt">, AMDGPUWaitAsyncIntrinsic;
+def int_amdgcn_s_wait_tensorcnt :
+ ClangBuiltin<"__builtin_amdgcn_s_wait_tensorcnt">, AMDGPUWaitAsyncIntrinsic;
+
def int_amdgcn_ds_atomic_async_barrier_arrive_b64 :
ClangBuiltin<"__builtin_amdgcn_ds_atomic_async_barrier_arrive_b64">,
Intrinsic<[], [local_ptr_ty],
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index c7c4276e0e252..2472b76fcf02c 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1764,6 +1764,27 @@ let OtherPredicates = [HasExportInsts] in
[(int_amdgcn_s_wait_kmcnt timm:$simm16)]>;
} // End SubtargetPredicate = isGFX12Plus, hasSideEffects = 1
+let SubtargetPredicate = isGFX1250Plus, hasSideEffects = 1 in {
+ def S_WAIT_ASYNCCNT :
+ SOPP_Pseudo<"s_wait_asynccnt", (ins s16imm:$simm16), "$simm16",
+ [(int_amdgcn_s_wait_asynccnt timm:$simm16)]> {
+ let mayLoad = 1;
+ let mayStore = 1;
+ let maybeAtomic = 0;
+ let Uses = [ASYNCcnt];
+ let Defs = [ASYNCcnt];
+ }
+ def S_WAIT_TENSORCNT :
+ SOPP_Pseudo<"s_wait_tensorcnt", (ins s16imm:$simm16), "$simm16",
+ [(int_amdgcn_s_wait_tensorcnt timm:$simm16)]> {
+ let mayLoad = 1;
+ let mayStore = 1;
+ let maybeAtomic = 0;
+ let Uses = [TENSORcnt];
+ let Defs = [TENSORcnt];
+ }
+} // End SubtargetPredicate = isGFX1250Plus, hasSideEffects = 1
+
let SubtargetPredicate = HasWaitXcnt, hasSideEffects = 1 in {
def S_WAIT_XCNT :
SOPP_Pseudo<"s_wait_xcnt", (ins s16imm:$simm16), "$simm16">;
@@ -2609,6 +2630,8 @@ defm S_WAIT_STORECNT_DSCNT : SOPP_Real_32_gfx12<0x049>;
//===----------------------------------------------------------------------===//
defm S_SETPRIO_INC_WG : SOPP_Real_32_gfx12<0x03e>;
defm S_WAIT_XCNT : SOPP_Real_32_gfx12<0x045>;
+defm S_WAIT_ASYNCCNT : SOPP_Real_32_gfx12<0x04a>;
+defm S_WAIT_TENSORCNT : SOPP_Real_32_gfx12<0x04b>;
//===----------------------------------------------------------------------===//
// SOPP - GFX11, GFX12.
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx1250.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx1250.ll
new file mode 100644
index 0000000000000..2173d07baa57e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx1250.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck %s -check-prefix=GFX12
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck %s -check-prefix=GFX12
+
+define amdgpu_ps void @test_asynccnt() {
+; GFX12-LABEL: test_asynccnt:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_asynccnt 0x0
+; GFX12-NEXT: s_endpgm
+ call void @llvm.amdgcn.s.wait.asynccnt(i16 0)
+ ret void
+}
+
+define amdgpu_ps void @test_tensorcnt() {
+; GFX12-LABEL: test_tensorcnt:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_tensorcnt 0x0
+; GFX12-NEXT: s_endpgm
+ call void @llvm.amdgcn.s.wait.tensorcnt(i16 0)
+ ret void
+}
+
+declare void @llvm.amdgcn.s.wait.asynccnt(i16)
+declare void @llvm.amdgcn.s.wait.tensorcnt(i16)
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s b/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s
index 6ebc17468eed6..234c2ed0de793 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s
@@ -1,6 +1,26 @@
// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefix=GFX1250 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR --implicit-check-not=error: -strict-whitespace %s
+s_wait_asynccnt 0x1234
+// GFX1250: [0x34,0x12,0xca,0xbf]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_wait_asynccnt 0xc1d1
+// GFX1250: [0xd1,0xc1,0xca,0xbf]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_wait_tensorcnt 0x0
+// GFX1250: [0x00,0x00,0xcb,0xbf]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_wait_tensorcnt 0x1
+// GFX1250: [0x01,0x00,0xcb,0xbf]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_wait_tensorcnt 0x3
+// GFX1250: [0x03,0x00,0xcb,0xbf]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
s_wait_xcnt 0x0
// GFX1250: [0x00,0x00,0xc5,0xbf]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt
index 220f9e5084f0e..e7026df3c0e2b 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt
@@ -1,5 +1,20 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s
+# GFX1250: s_wait_asynccnt 0x1234 ; encoding: [0x34,0x12,0xca,0xbf]
+0x34,0x12,0xca,0xbf
+
+# GFX1250: s_wait_asynccnt 0xc1d1 ; encoding: [0xd1,0xc1,0xca,0xbf]
+0xd1,0xc1,0xca,0xbf
+
+# GFX1250: s_wait_tensorcnt 0x0 ; encoding: [0x00,0x00,0xcb,0xbf]
+0x00,0x00,0xcb,0xbf
+
+# GFX1250: s_wait_tensorcnt 0x1 ; encoding: [0x01,0x00,0xcb,0xbf]
+0x01,0x00,0xcb,0xbf
+
+# GFX1250: s_wait_tensorcnt 0x3 ; encoding: [0x03,0x00,0xcb,0xbf]
+0x03,0x00,0xcb,0xbf
+
# GFX1250: s_wait_xcnt 0x0 ; encoding: [0x00,0x00,0xc5,0xbf]
0x00,0x00,0xc5,0xbf
More information about the llvm-commits
mailing list