[clang] [Clang][AMDGPU] Defer amdgcn.*to_lds builtins size diagnostics (PR #160140)

Juan Manuel Martinez CaamaƱo via cfe-commits cfe-commits at lists.llvm.org
Mon Sep 22 09:04:45 PDT 2025


https://github.com/jmmartinez created https://github.com/llvm/llvm-project/pull/160140

Before, the diagnostic was emitted immediately, as soon as the error was
detected. This is problematic during the host compilation, since
the compiler performs semantic analysis of `__device__` functions with the host's
target attributes.

A solution for this is to use `SemaRef.targetDiag` to defer the
diagnostic. The diagnostic will then be printed only if the function is
emitted.

The test included in this patch highlights a second problem: we cannot compile a file having a call to `__builtin_amdgcn_load_to_lds` on a `__device__` function since we typecheck the signature.
The issue is that, `__shared__ void*` on X86 doesn't translate to `addrspace(3) void*`, so the compilation fails.

I was thinking about doing the same (deferring the diagnostic) for the builtin signature; or adding an attribute to indicate that the type checking for the builtin is deferred using `targetDiag`.

>From 3605562dbbcf6b372f92aac53d8398e7557bc045 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <juamarti at amd.com>
Date: Mon, 22 Sep 2025 13:46:08 +0200
Subject: [PATCH 1/3] Pre-commit test for: [Clang][AMDGPU] Defer amdgcn.*to_lds
 builtins size diagnostics

---
 .../SemaHIP/amdgpu-gfx950-load-to-lds.hip     | 60 +++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip

diff --git a/clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip b/clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip
new file mode 100644
index 0000000000000..d5e781266e8d4
--- /dev/null
+++ b/clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip
@@ -0,0 +1,60 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -fsyntax-only -triple amdgcn -target-cpu gfx950 -verify=device %s -fcuda-is-device
+// RUN: %clang_cc1 -fsyntax-only -triple x86_64 -aux-triple amdgcn -verify=host %s
+// device-no-diagnostics
+
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __shared__ __attribute__((shared)) 
+
+__device__ void i_am_device(void* src, __amdgpu_buffer_rsrc_t rsrc, __shared__ void* dst, int vindex, int voffset, int soffset) {
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 1, voffset, soffset, 0, 0);
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 2, voffset, soffset, 0, 0);
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 4, voffset, soffset, 0, 0);
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 12, voffset, soffset, 0, 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 16, voffset, soffset, 0, 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
+
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 1, vindex, voffset, soffset, 0, 0);
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 2, vindex, voffset, soffset, 0, 0);
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 4, vindex, voffset, soffset, 0, 0);
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 12, vindex, voffset, soffset, 0, 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 16, vindex, voffset, soffset, 0, 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
+
+    __builtin_amdgcn_load_to_lds(src, dst, 1, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+    __builtin_amdgcn_load_to_lds(src, dst, 2, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+    __builtin_amdgcn_load_to_lds(src, dst, 4, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+    __builtin_amdgcn_load_to_lds(src, dst, 12, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+    __builtin_amdgcn_load_to_lds(src, dst, 16, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+
+    __builtin_amdgcn_global_load_lds(src, dst, 1, 0 , 0);
+    __builtin_amdgcn_global_load_lds(src, dst, 2, 0 , 0);
+    __builtin_amdgcn_global_load_lds(src, dst, 4, 0 , 0);
+    __builtin_amdgcn_global_load_lds(src, dst, 12, 0 , 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
+    __builtin_amdgcn_global_load_lds(src, dst, 16, 0 , 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
+}
+
+__global__ void i_am_kernel(void* src, __amdgpu_buffer_rsrc_t rsrc, __shared__ void* dst, int vindex, int voffset, int soffset) {
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 1, voffset, soffset, 0, 0);
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 2, voffset, soffset, 0, 0);
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 4, voffset, soffset, 0, 0);
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 12, voffset, soffset, 0, 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 16, voffset, soffset, 0, 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
+
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 1, vindex, voffset, soffset, 0, 0);
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 2, vindex, voffset, soffset, 0, 0);
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 4, vindex, voffset, soffset, 0, 0);
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 12, vindex, voffset, soffset, 0, 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 16, vindex, voffset, soffset, 0, 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
+
+    __builtin_amdgcn_load_to_lds(src, dst, 1, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+    __builtin_amdgcn_load_to_lds(src, dst, 2, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+    __builtin_amdgcn_load_to_lds(src, dst, 4, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+    __builtin_amdgcn_load_to_lds(src, dst, 12, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+    __builtin_amdgcn_load_to_lds(src, dst, 16, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+
+    __builtin_amdgcn_global_load_lds(src, dst, 1, 0 , 0);
+    __builtin_amdgcn_global_load_lds(src, dst, 2, 0 , 0);
+    __builtin_amdgcn_global_load_lds(src, dst, 4, 0 , 0);
+    __builtin_amdgcn_global_load_lds(src, dst, 12, 0 , 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
+    __builtin_amdgcn_global_load_lds(src, dst, 16, 0 , 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
+}

>From e687b653dfb9c66b88861e0f4eca85a4b1193fa7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <juamarti at amd.com>
Date: Mon, 22 Sep 2025 14:54:41 +0200
Subject: [PATCH 2/3] [Clang][AMDGPU] Defer amdgcn.*to_lds builtins size
 diagnostics

Before, the diagnostic was emitted immediately, as soon as the error was
detected.
This is problematic during the host compilation, since
the compiler performs semantic analysis of __device__ functions with the host's
target attributes.

A solution for this is to use `SemaRef.targetDiag` to defer the
diagnostic. The diagnostic will then be printed only if the function is
emitted.
---
 clang/lib/Sema/SemaAMDGPU.cpp                 |  6 +++--
 .../SemaHIP/amdgpu-gfx950-load-to-lds.hip     | 24 +++++++++----------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index baba503239e9f..916bddc9040ea 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -58,9 +58,11 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
       [[fallthrough]];
     }
     default:
-      Diag(ArgExpr->getExprLoc(), diag::err_amdgcn_load_lds_size_invalid_value)
+      SemaRef.targetDiag(ArgExpr->getExprLoc(),
+                         diag::err_amdgcn_load_lds_size_invalid_value)
           << ArgExpr->getSourceRange();
-      Diag(ArgExpr->getExprLoc(), diag::note_amdgcn_load_lds_size_valid_value)
+      SemaRef.targetDiag(ArgExpr->getExprLoc(),
+                         diag::note_amdgcn_load_lds_size_valid_value)
           << HasGFX950Insts << ArgExpr->getSourceRange();
       return true;
     }
diff --git a/clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip b/clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip
index d5e781266e8d4..366278f648939 100644
--- a/clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip
+++ b/clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip
@@ -11,14 +11,14 @@ __device__ void i_am_device(void* src, __amdgpu_buffer_rsrc_t rsrc, __shared__ v
     __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 1, voffset, soffset, 0, 0);
     __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 2, voffset, soffset, 0, 0);
     __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 4, voffset, soffset, 0, 0);
-    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 12, voffset, soffset, 0, 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
-    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 16, voffset, soffset, 0, 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 12, voffset, soffset, 0, 0);
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 16, voffset, soffset, 0, 0);
 
     __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 1, vindex, voffset, soffset, 0, 0);
     __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 2, vindex, voffset, soffset, 0, 0);
     __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 4, vindex, voffset, soffset, 0, 0);
-    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 12, vindex, voffset, soffset, 0, 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
-    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 16, vindex, voffset, soffset, 0, 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 12, vindex, voffset, soffset, 0, 0);
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 16, vindex, voffset, soffset, 0, 0);
 
     __builtin_amdgcn_load_to_lds(src, dst, 1, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
     __builtin_amdgcn_load_to_lds(src, dst, 2, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
@@ -29,22 +29,22 @@ __device__ void i_am_device(void* src, __amdgpu_buffer_rsrc_t rsrc, __shared__ v
     __builtin_amdgcn_global_load_lds(src, dst, 1, 0 , 0);
     __builtin_amdgcn_global_load_lds(src, dst, 2, 0 , 0);
     __builtin_amdgcn_global_load_lds(src, dst, 4, 0 , 0);
-    __builtin_amdgcn_global_load_lds(src, dst, 12, 0 , 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
-    __builtin_amdgcn_global_load_lds(src, dst, 16, 0 , 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
+    __builtin_amdgcn_global_load_lds(src, dst, 12, 0 , 0);
+    __builtin_amdgcn_global_load_lds(src, dst, 16, 0 , 0);
 }
 
 __global__ void i_am_kernel(void* src, __amdgpu_buffer_rsrc_t rsrc, __shared__ void* dst, int vindex, int voffset, int soffset) {
     __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 1, voffset, soffset, 0, 0);
     __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 2, voffset, soffset, 0, 0);
     __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 4, voffset, soffset, 0, 0);
-    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 12, voffset, soffset, 0, 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
-    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 16, voffset, soffset, 0, 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 12, voffset, soffset, 0, 0);
+    __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 16, voffset, soffset, 0, 0);
 
     __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 1, vindex, voffset, soffset, 0, 0);
     __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 2, vindex, voffset, soffset, 0, 0);
     __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 4, vindex, voffset, soffset, 0, 0);
-    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 12, vindex, voffset, soffset, 0, 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
-    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 16, vindex, voffset, soffset, 0, 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 12, vindex, voffset, soffset, 0, 0);
+    __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 16, vindex, voffset, soffset, 0, 0);
 
     __builtin_amdgcn_load_to_lds(src, dst, 1, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
     __builtin_amdgcn_load_to_lds(src, dst, 2, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
@@ -55,6 +55,6 @@ __global__ void i_am_kernel(void* src, __amdgpu_buffer_rsrc_t rsrc, __shared__ v
     __builtin_amdgcn_global_load_lds(src, dst, 1, 0 , 0);
     __builtin_amdgcn_global_load_lds(src, dst, 2, 0 , 0);
     __builtin_amdgcn_global_load_lds(src, dst, 4, 0 , 0);
-    __builtin_amdgcn_global_load_lds(src, dst, 12, 0 , 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
-    __builtin_amdgcn_global_load_lds(src, dst, 16, 0 , 0); // host-error{{invalid size value}} host-note{{size must be 1, 2, or 4}}
+    __builtin_amdgcn_global_load_lds(src, dst, 12, 0 , 0);
+    __builtin_amdgcn_global_load_lds(src, dst, 16, 0 , 0);
 }

>From 4d97ebb6927cd3e411818a084b23fe3a8733ad82 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <juamarti at amd.com>
Date: Mon, 22 Sep 2025 16:55:35 +0200
Subject: [PATCH 3/3] [Clang][AMDGPU] Update amdgpu-builtin-in-lambda.hip

With previous commit we defer the diagnostics.

Lambda's are both __host__ and __device__ functions by default. And
their diagnostics get deferred also when -fcuda-is-device is used.

In this case, the diagnostics with the error are printed with a note
indicating the callstack. Sadly, this note overshadows the
error, and the note with the valid sizes gets lost (AFAIK notes are not
printed if the previous diagnostic is not a warning or an error).
---
 clang/test/SemaHIP/amdgpu-builtin-in-lambda.hip | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/clang/test/SemaHIP/amdgpu-builtin-in-lambda.hip b/clang/test/SemaHIP/amdgpu-builtin-in-lambda.hip
index 8f0b14b7379d2..f89fc7b971e16 100644
--- a/clang/test/SemaHIP/amdgpu-builtin-in-lambda.hip
+++ b/clang/test/SemaHIP/amdgpu-builtin-in-lambda.hip
@@ -10,7 +10,7 @@ struct S {
     };
 
     static constexpr auto global_load_lds_lambda = [](void* src, __shared__ void *dst) {
-        __builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}} gfx90a-note{{size must be 1, 2, or 4}}
+        __builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}}
     };
 };
 
@@ -19,7 +19,7 @@ __device__ __amdgpu_buffer_rsrc_t test_simple_builtin(void *p, short stride, int
 }
 
 __device__ void test_target_dependant_builtin(void *src, __shared__ void *dst) {
-    S::global_load_lds_lambda(src, dst);
+    S::global_load_lds_lambda(src, dst); // gfx90a-note{{called by 'test_target_dependant_builtin'}}
 }
 
 constexpr auto make_buffer_rsrc_lambda = [](void *p, short stride, int num, int flags) {
@@ -27,7 +27,7 @@ constexpr auto make_buffer_rsrc_lambda = [](void *p, short stride, int num, int
 };
 
 constexpr auto global_load_lds_lambda = [](void* src, __shared__ void *dst) {
-    __builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}} gfx90a-note{{size must be 1, 2, or 4}}
+    __builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}}
 };
 
 __device__ __amdgpu_buffer_rsrc_t global_test_simple_builtin(void *p, short stride, int num, int flags) {
@@ -35,7 +35,7 @@ __device__ __amdgpu_buffer_rsrc_t global_test_simple_builtin(void *p, short stri
 }
 
 __device__ void global_test_target_dependant_builtin(void *src, __shared__ void *dst) {
-    global_load_lds_lambda(src, dst);
+    global_load_lds_lambda(src, dst); // gfx90a-note{{called by 'global_test_target_dependant_builtin'}}
 }
 
 __device__ __amdgpu_buffer_rsrc_t local_test_simple_builtin(void *p, short stride, int num, int flags) {
@@ -47,7 +47,7 @@ __device__ __amdgpu_buffer_rsrc_t local_test_simple_builtin(void *p, short strid
 
 __device__ void local_test_target_dependant_builtin(void *src, __shared__ void *dst) {
     constexpr auto f = [](void* src, __shared__ void *dst) {
-        __builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}} gfx90a-note{{size must be 1, 2, or 4}}
+        __builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}}
     };
-    f(src, dst);
+    f(src, dst); // gfx90a-note{{called by 'local_test_target_dependant_builtin'}}
 }



More information about the cfe-commits mailing list