[clang] [Clang][AMDGPU] Defer amdgcn.*to_lds builtins size diagnostics (PR #160140)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Sep 22 09:05:25 PDT 2025
Juan Manuel Martinez =?utf-8?q?Caamaño?= <juamarti at amd.com>,
Juan Manuel Martinez =?utf-8?q?Caamaño?= <juamarti at amd.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/160140 at github.com>
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Juan Manuel Martinez Caamaño (jmmartinez)
<details>
<summary>Changes</summary>
Before, the diagnostic was emitted immediately, as soon as the error was
detected. This is problematic during the host compilation, since
the compiler performs semantic analysis of `__device__` functions with the host's
target attributes.
A solution for this is to use `SemaRef.targetDiag` to defer the
diagnostic. The diagnostic will then be printed only if the function is
emitted.
The test included in this patch highlights a second problem: we cannot compile a file having a call to `__builtin_amdgcn_load_to_lds` on a `__device__` function since we typecheck the signature.
The issue is that, `__shared__ void*` on X86 doesn't translate to `addrspace(3) void*`, so the compilation fails.
I was thinking about doing the same (deferring the diagnostic) for the builtin signature; or adding an attribute to indicate that the type checking for the builtin is deferred using `targetDiag`.
---
Full diff: https://github.com/llvm/llvm-project/pull/160140.diff
3 Files Affected:
- (modified) clang/lib/Sema/SemaAMDGPU.cpp (+4-2)
- (modified) clang/test/SemaHIP/amdgpu-builtin-in-lambda.hip (+6-6)
- (added) clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip (+60)
``````````diff
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index baba503239e9f..916bddc9040ea 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -58,9 +58,11 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
[[fallthrough]];
}
default:
- Diag(ArgExpr->getExprLoc(), diag::err_amdgcn_load_lds_size_invalid_value)
+ SemaRef.targetDiag(ArgExpr->getExprLoc(),
+ diag::err_amdgcn_load_lds_size_invalid_value)
<< ArgExpr->getSourceRange();
- Diag(ArgExpr->getExprLoc(), diag::note_amdgcn_load_lds_size_valid_value)
+ SemaRef.targetDiag(ArgExpr->getExprLoc(),
+ diag::note_amdgcn_load_lds_size_valid_value)
<< HasGFX950Insts << ArgExpr->getSourceRange();
return true;
}
diff --git a/clang/test/SemaHIP/amdgpu-builtin-in-lambda.hip b/clang/test/SemaHIP/amdgpu-builtin-in-lambda.hip
index 8f0b14b7379d2..f89fc7b971e16 100644
--- a/clang/test/SemaHIP/amdgpu-builtin-in-lambda.hip
+++ b/clang/test/SemaHIP/amdgpu-builtin-in-lambda.hip
@@ -10,7 +10,7 @@ struct S {
};
static constexpr auto global_load_lds_lambda = [](void* src, __shared__ void *dst) {
- __builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}} gfx90a-note{{size must be 1, 2, or 4}}
+ __builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}}
};
};
@@ -19,7 +19,7 @@ __device__ __amdgpu_buffer_rsrc_t test_simple_builtin(void *p, short stride, int
}
__device__ void test_target_dependant_builtin(void *src, __shared__ void *dst) {
- S::global_load_lds_lambda(src, dst);
+ S::global_load_lds_lambda(src, dst); // gfx90a-note{{called by 'test_target_dependant_builtin'}}
}
constexpr auto make_buffer_rsrc_lambda = [](void *p, short stride, int num, int flags) {
@@ -27,7 +27,7 @@ constexpr auto make_buffer_rsrc_lambda = [](void *p, short stride, int num, int
};
constexpr auto global_load_lds_lambda = [](void* src, __shared__ void *dst) {
- __builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}} gfx90a-note{{size must be 1, 2, or 4}}
+ __builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}}
};
__device__ __amdgpu_buffer_rsrc_t global_test_simple_builtin(void *p, short stride, int num, int flags) {
@@ -35,7 +35,7 @@ __device__ __amdgpu_buffer_rsrc_t global_test_simple_builtin(void *p, short stri
}
__device__ void global_test_target_dependant_builtin(void *src, __shared__ void *dst) {
- global_load_lds_lambda(src, dst);
+ global_load_lds_lambda(src, dst); // gfx90a-note{{called by 'global_test_target_dependant_builtin'}}
}
__device__ __amdgpu_buffer_rsrc_t local_test_simple_builtin(void *p, short stride, int num, int flags) {
@@ -47,7 +47,7 @@ __device__ __amdgpu_buffer_rsrc_t local_test_simple_builtin(void *p, short strid
__device__ void local_test_target_dependant_builtin(void *src, __shared__ void *dst) {
constexpr auto f = [](void* src, __shared__ void *dst) {
- __builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}} gfx90a-note{{size must be 1, 2, or 4}}
+ __builtin_amdgcn_global_load_lds(src, dst, 16, 0, 0); // gfx90a-error{{invalid size value}}
};
- f(src, dst);
+ f(src, dst); // gfx90a-note{{called by 'local_test_target_dependant_builtin'}}
}
diff --git a/clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip b/clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip
new file mode 100644
index 0000000000000..366278f648939
--- /dev/null
+++ b/clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip
@@ -0,0 +1,60 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -fsyntax-only -triple amdgcn -target-cpu gfx950 -verify=device %s -fcuda-is-device
+// RUN: %clang_cc1 -fsyntax-only -triple x86_64 -aux-triple amdgcn -verify=host %s
+// device-no-diagnostics
+
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __shared__ __attribute__((shared))
+
+__device__ void i_am_device(void* src, __amdgpu_buffer_rsrc_t rsrc, __shared__ void* dst, int vindex, int voffset, int soffset) {
+ __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 1, voffset, soffset, 0, 0);
+ __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 2, voffset, soffset, 0, 0);
+ __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 4, voffset, soffset, 0, 0);
+ __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 12, voffset, soffset, 0, 0);
+ __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 16, voffset, soffset, 0, 0);
+
+ __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 1, vindex, voffset, soffset, 0, 0);
+ __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 2, vindex, voffset, soffset, 0, 0);
+ __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 4, vindex, voffset, soffset, 0, 0);
+ __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 12, vindex, voffset, soffset, 0, 0);
+ __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 16, vindex, voffset, soffset, 0, 0);
+
+ __builtin_amdgcn_load_to_lds(src, dst, 1, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+ __builtin_amdgcn_load_to_lds(src, dst, 2, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+ __builtin_amdgcn_load_to_lds(src, dst, 4, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+ __builtin_amdgcn_load_to_lds(src, dst, 12, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+ __builtin_amdgcn_load_to_lds(src, dst, 16, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+
+ __builtin_amdgcn_global_load_lds(src, dst, 1, 0 , 0);
+ __builtin_amdgcn_global_load_lds(src, dst, 2, 0 , 0);
+ __builtin_amdgcn_global_load_lds(src, dst, 4, 0 , 0);
+ __builtin_amdgcn_global_load_lds(src, dst, 12, 0 , 0);
+ __builtin_amdgcn_global_load_lds(src, dst, 16, 0 , 0);
+}
+
+__global__ void i_am_kernel(void* src, __amdgpu_buffer_rsrc_t rsrc, __shared__ void* dst, int vindex, int voffset, int soffset) {
+ __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 1, voffset, soffset, 0, 0);
+ __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 2, voffset, soffset, 0, 0);
+ __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 4, voffset, soffset, 0, 0);
+ __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 12, voffset, soffset, 0, 0);
+ __builtin_amdgcn_raw_ptr_buffer_load_lds(rsrc, dst, 16, voffset, soffset, 0, 0);
+
+ __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 1, vindex, voffset, soffset, 0, 0);
+ __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 2, vindex, voffset, soffset, 0, 0);
+ __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 4, vindex, voffset, soffset, 0, 0);
+ __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 12, vindex, voffset, soffset, 0, 0);
+ __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 16, vindex, voffset, soffset, 0, 0);
+
+ __builtin_amdgcn_load_to_lds(src, dst, 1, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+ __builtin_amdgcn_load_to_lds(src, dst, 2, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+ __builtin_amdgcn_load_to_lds(src, dst, 4, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+ __builtin_amdgcn_load_to_lds(src, dst, 12, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+ __builtin_amdgcn_load_to_lds(src, dst, 16, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+
+ __builtin_amdgcn_global_load_lds(src, dst, 1, 0 , 0);
+ __builtin_amdgcn_global_load_lds(src, dst, 2, 0 , 0);
+ __builtin_amdgcn_global_load_lds(src, dst, 4, 0 , 0);
+ __builtin_amdgcn_global_load_lds(src, dst, 12, 0 , 0);
+ __builtin_amdgcn_global_load_lds(src, dst, 16, 0 , 0);
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/160140
More information about the cfe-commits
mailing list