[clang] [Clang][AMDGPU] Defer amdgcn.*to_lds builtins size diagnostics (PR #160140)

via cfe-commits cfe-commits at lists.llvm.org
Mon Sep 22 09:05:25 PDT 2025


Juan Manuel Martinez =?utf-8?q?Caamaño?= <juamarti at amd.com>,
Juan Manuel Martinez =?utf-8?q?Caamaño?= <juamarti at amd.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/160140 at github.com>


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Juan Manuel Martinez Caamaño (jmmartinez)

<details>
<summary>Changes</summary>

Before, the diagnostic was emitted immediately, as soon as the error was
detected. This is problematic during the host compilation, since
the compiler performs semantic analysis of `__device__` functions with the host's
target attributes.

A solution for this is to use `SemaRef.targetDiag` to defer the
diagnostic. The diagnostic will then be printed only if the function is
emitted.

The test included in this patch highlights a second problem: we cannot compile a file having a call to `__builtin_amdgcn_load_to_lds` on a `__device__` function since we typecheck the signature.
The issue is that, `__shared__ void*` on X86 doesn't translate to `addrspace(3) void*`, so the compilation fails.

I was thinking about doing the same (deferring the diagnostic) for the builtin signature; or adding an attribute to indicate that the type checking for the builtin is deferred using `targetDiag`.

---
Full diff: https://github.com/llvm/llvm-project/pull/160140.diff


3 Files Affected:

- (modified) clang/lib/Sema/SemaAMDGPU.cpp (+4-2) 
- (modified) clang/test/SemaHIP/amdgpu-builtin-in-lambda.hip (+6-6) 
- (added) clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip (+60) 


``````````diff
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index baba503239e9f..916bddc9040ea 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -58,9 +58,11 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
       [[fallthrough]];
     }
     default:
-      Diag(ArgExpr->getExprLoc(), diag::err_amdgcn_load_lds_size_invalid_value)
+      SemaRef.targetDiag(ArgExpr->getExprLoc(),
+                         diag::err_amdgcn_load_lds_size_invalid_value)
           << ArgExpr->getSourceRange();
-      Diag(ArgExpr->getExprLoc(), diag::note_amdgcn_load_lds_size_valid_value)
+      SemaRef.targetDiag(ArgExpr->getExprLoc(),
+                         diag::note_amdgcn_load_lds_size_valid_value)
           << HasGFX950Insts << ArgExpr->getSourceRange();
       return true;
     }
diff --git a/clang/test/SemaHIP/amdgpu-builtin-in-lambda.hip b/clang/test/SemaHIP/amdgpu-builtin-in-lambda.hip
index 8f0b14b7379d2..f89fc7b971e16 100644
--- a/clang/test/SemaHIP/amdgpu-builtin-in-lambda.hip
+++ b/clang/test/SemaHIP/amdgpu-builtin-in-lambda.hip
@@ -10,7 +10,7 @@ struct S {
     };
 
     static constexpr auto global_load_lds_lambda = [](void* src, __shared__ void *dst) {
-        __builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}} gfx90a-note{{size must be 1, 2, or 4}}
+        __builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}}
     };
 };
 
@@ -19,7 +19,7 @@ __device__ __amdgpu_buffer_rsrc_t test_simple_builtin(void *p, short stride, int
 }
 
 __device__ void test_target_dependant_builtin(void *src, __shared__ void *dst) {
-    S::global_load_lds_lambda(src, dst);
+    S::global_load_lds_lambda(src, dst); // gfx90a-note{{called by 'test_target_dependant_builtin'}}
 }
 
 constexpr auto make_buffer_rsrc_lambda = [](void *p, short stride, int num, int flags) {
@@ -27,7 +27,7 @@ constexpr auto make_buffer_rsrc_lambda = [](void *p, short stride, int num, int
 };
 
 constexpr auto global_load_lds_lambda = [](void* src, __shared__ void *dst) {
-    __builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}} gfx90a-note{{size must be 1, 2, or 4}}
+    __builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}}
 };
 
 __device__ __amdgpu_buffer_rsrc_t global_test_simple_builtin(void *p, short stride, int num, int flags) {
@@ -35,7 +35,7 @@ __device__ __amdgpu_buffer_rsrc_t global_test_simple_builtin(void *p, short stri
 }
 
 __device__ void global_test_target_dependant_builtin(void *src, __shared__ void *dst) {
-    global_load_lds_lambda(src, dst);
+    global_load_lds_lambda(src, dst); // gfx90a-note{{called by 'global_test_target_dependant_builtin'}}
 }
 
 __device__ __amdgpu_buffer_rsrc_t local_test_simple_builtin(void *p, short stride, int num, int flags) {
@@ -47,7 +47,7 @@ __device__ __amdgpu_buffer_rsrc_t local_test_simple_builtin(void *p, short strid
 
 __device__ void local_test_target_dependant_builtin(void *src, __shared__ void *dst) {
     constexpr auto f = [](void* src, __shared__ void *dst) {
-        __builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}} gfx90a-note{{size must be 1, 2, or 4}}
+        __builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}}
     };
-    f(src, dst);
+    f(src, dst); // gfx90a-note{{called by 'local_test_target_dependant_builtin'}}
 }
diff --git a/clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip b/clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip
new file mode 100644
index 0000000000000..366278f648939
--- /dev/null
+++ b/clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip
@@ -0,0 +1,60 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -fsyntax-only -triple amdgcn -target-cpu gfx950 -verify=device %s -fcuda-is-device
+// RUN: %clang_cc1 -fsyntax-only -triple x86_64 -aux-triple amdgcn -verify=host %s
+// device-no-diagnostics
+
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __shared__ __attribute__((shared)) 
+
+__device__ void i_am_device(void* src, __amdgpu_buffer_rsrc_t rsrc, __shared__ void* dst, int vindex, int voffset, int soffset) {
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 1, voffset, soffset, 0, 0);
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 2, voffset, soffset, 0, 0);
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 4, voffset, soffset, 0, 0);
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 12, voffset, soffset, 0, 0);
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 16, voffset, soffset, 0, 0);
+
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 1, vindex, voffset, soffset, 0, 0);
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 2, vindex, voffset, soffset, 0, 0);
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 4, vindex, voffset, soffset, 0, 0);
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 12, vindex, voffset, soffset, 0, 0);
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 16, vindex, voffset, soffset, 0, 0);
+
+    __builtin_amdgcn_load_to_lds(src, dst, 1, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+    __builtin_amdgcn_load_to_lds(src, dst, 2, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+    __builtin_amdgcn_load_to_lds(src, dst, 4, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+    __builtin_amdgcn_load_to_lds(src, dst, 12, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+    __builtin_amdgcn_load_to_lds(src, dst, 16, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+
+    __builtin_amdgcn_global_load_lds(src, dst, 1, 0 , 0);
+    __builtin_amdgcn_global_load_lds(src, dst, 2, 0 , 0);
+    __builtin_amdgcn_global_load_lds(src, dst, 4, 0 , 0);
+    __builtin_amdgcn_global_load_lds(src, dst, 12, 0 , 0);
+    __builtin_amdgcn_global_load_lds(src, dst, 16, 0 , 0);
+}
+
+__global__ void i_am_kernel(void* src, __amdgpu_buffer_rsrc_t rsrc, __shared__ void* dst, int vindex, int voffset, int soffset) {
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 1, voffset, soffset, 0, 0);
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 2, voffset, soffset, 0, 0);
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 4, voffset, soffset, 0, 0);
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 12, voffset, soffset, 0, 0);
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 16, voffset, soffset, 0, 0);
+
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 1, vindex, voffset, soffset, 0, 0);
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 2, vindex, voffset, soffset, 0, 0);
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 4, vindex, voffset, soffset, 0, 0);
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 12, vindex, voffset, soffset, 0, 0);
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 16, vindex, voffset, soffset, 0, 0);
+
+    __builtin_amdgcn_load_to_lds(src, dst, 1, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+    __builtin_amdgcn_load_to_lds(src, dst, 2, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+    __builtin_amdgcn_load_to_lds(src, dst, 4, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+    __builtin_amdgcn_load_to_lds(src, dst, 12, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+    __builtin_amdgcn_load_to_lds(src, dst, 16, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+
+    __builtin_amdgcn_global_load_lds(src, dst, 1, 0 , 0);
+    __builtin_amdgcn_global_load_lds(src, dst, 2, 0 , 0);
+    __builtin_amdgcn_global_load_lds(src, dst, 4, 0 , 0);
+    __builtin_amdgcn_global_load_lds(src, dst, 12, 0 , 0);
+    __builtin_amdgcn_global_load_lds(src, dst, 16, 0 , 0);
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/160140


More information about the cfe-commits mailing list