[clang] [llvm] [Clang][AMDGPU] Expose buffer load lds as a clang builtin (PR #132048)
Juan Manuel Martinez CaamaƱo via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 19 08:19:14 PDT 2025
https://github.com/jmmartinez created https://github.com/llvm/llvm-project/pull/132048
CK is using either inline assembly or inline LLVM-IR builtins to generate buffer_load_dword lds instructions.
This patch exposes this instruction as a Clang builtin.
Limitations in this patch: these instructions are not available for gfx11/gfx12. I wasn't able to identify a target feature that we could use to predicate the builtin availability, in the same way as it is done for global_load_lds.
Related to SWDEV-519702 and SWDEV-518861
>From c89ffa43c68454ba07af248e345078d6a70b856b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <juamarti at amd.com>
Date: Thu, 13 Mar 2025 10:00:28 +0100
Subject: [PATCH] [Clang][AMDGPU] Expose buffer load lds as a clang builtin
CK is using either inline assembly or inline llvm-ir builtins to
generate buffer_load_dword lds instructions.
This patch exposes this instruction as a builtin.
Related to SWDEV-519702 and SWDEV-518861
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 2 ++
clang/include/clang/Basic/DiagnosticSemaKinds.td | 4 ++--
clang/lib/Sema/SemaAMDGPU.cpp | 7 +++----
.../builtins-amdgcn-raw-buffer-load.cl | 9 +++++++++
...iltins-amdgcn-raw-ptr-buffer-load-lds-error.cl | 15 +++++++++++++++
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 4 +++-
6 files changed, 34 insertions(+), 7 deletions(-)
create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 44ef404aee72f..4117e8d67330b 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -162,6 +162,8 @@ BUILTIN(__builtin_amdgcn_raw_buffer_load_b64, "V2UiQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3UiQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4UiQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_ptr_buffer_load_lds, "vQbv*3IUiiiIiIi", "t")
+
//===----------------------------------------------------------------------===//
// Ballot builtins.
//===----------------------------------------------------------------------===//
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 1536a3b8c920a..e17d6b530141d 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -13132,6 +13132,6 @@ def err_acc_duplicate_bind
"permitted to refer to the same function">;
// AMDGCN builtins diagnostics
-def err_amdgcn_global_load_lds_size_invalid_value : Error<"invalid size value">;
-def note_amdgcn_global_load_lds_size_valid_value : Note<"size must be %select{1, 2, or 4|1, 2, 4, 12 or 16}0">;
+def err_amdgcn_load_lds_size_invalid_value : Error<"invalid size value">;
+def note_amdgcn_load_lds_size_valid_value : Note<"size must be %select{1, 2, or 4|1, 2, 4, 12 or 16}0">;
} // end of sema component.
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index a4d075dfd0768..7fec099374152 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -35,6 +35,7 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
Builtin::evaluateRequiredTargetFeatures("gfx950-insts", CallerFeatureMap);
switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_raw_ptr_buffer_load_lds:
case AMDGPU::BI__builtin_amdgcn_global_load_lds: {
constexpr const int SizeIdx = 2;
llvm::APSInt Size;
@@ -54,11 +55,9 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
[[fallthrough]];
}
default:
- Diag(ArgExpr->getExprLoc(),
- diag::err_amdgcn_global_load_lds_size_invalid_value)
+ Diag(ArgExpr->getExprLoc(), diag::err_amdgcn_load_lds_size_invalid_value)
<< ArgExpr->getSourceRange();
- Diag(ArgExpr->getExprLoc(),
- diag::note_amdgcn_global_load_lds_size_valid_value)
+ Diag(ArgExpr->getExprLoc(), diag::note_amdgcn_load_lds_size_valid_value)
<< HasGFX950Insts << ArgExpr->getSourceRange();
return true;
}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl
index 3403b69e07e4b..5e3ed9027c17a 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl
@@ -170,3 +170,12 @@ v3u32 test_amdgcn_raw_ptr_buffer_load_b96_non_const_soffset(__amdgpu_buffer_rsrc
v4u32 test_amdgcn_raw_ptr_buffer_load_b128_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, soffset, /*aux=*/0);
}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_lds(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) [[RSRC:%.*]], ptr addrspace(3) [[LDS:%.*]], i32 1, i32 [[OFFSET:%.*]], i32 [[SOFFSET:%.*]], i32 2, i32 3)
+// CHECK-NEXT: ret void
+//
+void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void * lds, int offset, int soffset) {
+ __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 1, offset, soffset, 2, 3);
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl
new file mode 100644
index 0000000000000..b7065ea2b6655
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-raw-ptr-buffer-load-lds-error.cl
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -S -verify -o - %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -S -verify -o - %s -DGFX950
+// REQUIRES: amdgpu-registered-target
+
+void test_amdgcn_raw_ptr_buffer_load_lds(__amdgpu_buffer_rsrc_t rsrc, __local void* lds, int offset, int soffset, int x) {
+ __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, x, offset, soffset, 0, 0); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' must be a constant integer}}
+ __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, x, 0); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' must be a constant integer}}
+ __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 4, offset, soffset, 0, x); //expected-error{{argument to '__builtin_amdgcn_raw_ptr_buffer_load_lds' must be a constant integer}}
+
+#ifdef GFX950
+ __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 3, offset, soffset, 0, 0); //expected-error{{invalid size value}} expected-note{{size must be 1, 2, 4, 12 or 16}}
+#else
+ __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, lds, 3, offset, soffset, 0, 0); //expected-error{{invalid size value}} expected-note{{size must be 1, 2, or 4}}
+#endif
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 307e4b8a01e5c..efec2ef8325c9 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1863,7 +1863,9 @@ class AMDGPURawBufferLoadLDS : Intrinsic <
ImmArg<ArgIndex<6>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
def int_amdgcn_raw_buffer_load_lds : AMDGPURawBufferLoadLDS;
-class AMDGPURawPtrBufferLoadLDS : Intrinsic <
+class AMDGPURawPtrBufferLoadLDS :
+ ClangBuiltin<"__builtin_amdgcn_raw_ptr_buffer_load_lds">,
+ Intrinsic <
[],
[AMDGPUBufferRsrcTy, // rsrc(SGPR)
LLVMQualPointerType<3>, // LDS base offset
More information about the llvm-commits
mailing list