[clang] d995b2e - [Clang][AMDGPU] Accept builtins in lambda declarations (#135027)

Fri Apr 11 10:11:50 PDT 2025

Author: Juan Manuel Martinez Caamaño
Date: 2025-04-11T19:11:46+02:00
New Revision: d995b2ebdc1a312c8583351c13dac90e8e320a09

URL: https://github.com/llvm/llvm-project/commit/d995b2ebdc1a312c8583351c13dac90e8e320a09
DIFF: https://github.com/llvm/llvm-project/commit/d995b2ebdc1a312c8583351c13dac90e8e320a09.diff

LOG: [Clang][AMDGPU] Accept builtins in lambda declarations (#135027)

`Sema::getCurFunctionDecl(AllowLambda = false)` returns a nullptr when
the lambda declaration is outside a function (for example, when
assigning a lambda to a static constexpr variable).

This triggered an assertion in
`SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall`.

Using `Sema::getCurFunctionDecl(AllowLambda = true)` returns the
declaration of the enclosing lambda.

Stumbled with this issue when refactoring some code in CK.

Added: 
    clang/test/SemaHIP/amdgpu-builtin-in-lambda-with-unsupported-attribute.hip
    clang/test/SemaHIP/amdgpu-builtin-in-lambda.hip

Modified: 
    clang/lib/Sema/SemaAMDGPU.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index 7fec099374152..a6366aceec2a6 100644

--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -27,7 +27,7 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
   // position of memory order and scope arguments in the builtin
   unsigned OrderIndex, ScopeIndex;
 
-  const auto *FD = SemaRef.getCurFunctionDecl();
+  const auto *FD = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
   assert(FD && "AMDGPU builtins should not be used outside of a function");
   llvm::StringMap<bool> CallerFeatureMap;
   getASTContext().getFunctionFeatureMap(CallerFeatureMap, FD);

diff  --git a/clang/test/SemaHIP/amdgpu-builtin-in-lambda-with-unsupported-attribute.hip b/clang/test/SemaHIP/amdgpu-builtin-in-lambda-with-unsupported-attribute.hip
new file mode 100644
index 0000000000000..5b9223f2eaa3a
--- /dev/null
+++ b/clang/test/SemaHIP/amdgpu-builtin-in-lambda-with-unsupported-attribute.hip
@@ -0,0 +1,34 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -std=c++20 -triple amdgcn -target-cpu tahiti -emit-llvm -fcuda-is-device -verify=no-memrealtime -o - %s
+// RUN: %clang_cc1 -std=c++20 -triple amdgcn -target-cpu gfx950 -emit-llvm -fcuda-is-device -o - %s
+
+#define __device__ __attribute__((device))
+#define __shared__ __attribute__((shared))
+
+struct S {
+    static constexpr auto memrealtime_lambda = []() {
+        __builtin_amdgcn_s_memrealtime(); // no-memrealtime-error{{'__builtin_amdgcn_s_memrealtime' needs target feature s-memrealtime}}
+    };
+};
+
+__attribute__((target("s-memrealtime")))
+__device__ void test_target_dependant_builtin_attr_fail() {
+    S::memrealtime_lambda();
+}
+
+constexpr auto memrealtime_lambda = []() {
+    __builtin_amdgcn_s_memrealtime(); // no-memrealtime-error{{'__builtin_amdgcn_s_memrealtime' needs target feature s-memrealtime}}
+};
+
+__attribute__((target("s-memrealtime")))
+__device__ void global_test_target_dependant_builtin_attr_fail() {
+    memrealtime_lambda();
+}
+
+__attribute__((target("s-memrealtime")))
+__device__ void local_test_target_dependant_builtin_attr_fail() {
+    static constexpr auto f = []() {
+        __builtin_amdgcn_s_memrealtime(); // no-memrealtime-error{{'__builtin_amdgcn_s_memrealtime' needs target feature s-memrealtime}}
+    };
+    f();
+}

diff  --git a/clang/test/SemaHIP/amdgpu-builtin-in-lambda.hip b/clang/test/SemaHIP/amdgpu-builtin-in-lambda.hip
new file mode 100644
index 0000000000000..8f0b14b7379d2
--- /dev/null
+++ b/clang/test/SemaHIP/amdgpu-builtin-in-lambda.hip
@@ -0,0 +1,53 @@
+// RUN: %clang_cc1 -std=c++20 -triple amdgcn -target-cpu gfx90a -fsyntax-only -fcuda-is-device -verify=gfx90a -o - %s
+// RUN: %clang_cc1 -std=c++20 -triple amdgcn -target-cpu gfx950 -fsyntax-only -fcuda-is-device -o - %s
+
+#define __device__ __attribute__((device))
+#define __shared__ __attribute__((shared))
+
+struct S {
+    static constexpr auto make_buffer_rsrc_lambda = [](void *p, short stride, int num, int flags) {
+        return __builtin_amdgcn_make_buffer_rsrc(p, stride, num, flags);
+    };
+
+    static constexpr auto global_load_lds_lambda = [](void* src, __shared__ void *dst) {
+        __builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}} gfx90a-note{{size must be 1, 2, or 4}}
+    };
+};
+
+__device__ __amdgpu_buffer_rsrc_t test_simple_builtin(void *p, short stride, int num, int flags) {
+    return S::make_buffer_rsrc_lambda(p, stride, num, flags);
+}
+
+__device__ void test_target_dependant_builtin(void *src, __shared__ void *dst) {
+    S::global_load_lds_lambda(src, dst);
+}
+
+constexpr auto make_buffer_rsrc_lambda = [](void *p, short stride, int num, int flags) {
+    return __builtin_amdgcn_make_buffer_rsrc(p, stride, num, flags);
+};
+
+constexpr auto global_load_lds_lambda = [](void* src, __shared__ void *dst) {
+    __builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}} gfx90a-note{{size must be 1, 2, or 4}}
+};
+
+__device__ __amdgpu_buffer_rsrc_t global_test_simple_builtin(void *p, short stride, int num, int flags) {
+    return make_buffer_rsrc_lambda(p, stride, num, flags);
+}
+
+__device__ void global_test_target_dependant_builtin(void *src, __shared__ void *dst) {
+    global_load_lds_lambda(src, dst);
+}
+
+__device__ __amdgpu_buffer_rsrc_t local_test_simple_builtin(void *p, short stride, int num, int flags) {
+    constexpr auto f = [](void *p, short stride, int num, int flags) {
+        return __builtin_amdgcn_make_buffer_rsrc(p, stride, num, flags);
+    };
+    return f(p, stride, num, flags);
+}
+
+__device__ void local_test_target_dependant_builtin(void *src, __shared__ void *dst) {
+    constexpr auto f = [](void* src, __shared__ void *dst) {
+        __builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}} gfx90a-note{{size must be 1, 2, or 4}}
+    };
+    f(src, dst);
+}