[clang] [HIP] Perform implicit pointer cast when compiling device code, not when -fcuda-is-device (PR #165387)

Juan Manuel Martinez CaamaƱo via cfe-commits cfe-commits at lists.llvm.org
Tue Oct 28 06:24:01 PDT 2025


https://github.com/jmmartinez updated https://github.com/llvm/llvm-project/pull/165387

>From ed2f606ab6f7d2661189401e724fb16affdc0732 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?=
 <jmartinezcaamao at gmail.com>
Date: Tue, 28 Oct 2025 11:46:03 +0100
Subject: [PATCH] [HIP] Perform implicit pointer cast when compiling device
 code, not when -fcuda-is-device

When compiling HIP device code, we add implicit casts for the pointer
arguments being passed to builtin calls.

When compiling for the host, apply the same casts for __device__ or __kernel__ functions,
since the device side of the source should still pass type checks.

This patch changes the condition depending on -fcuda-is-device to depend
on if the builtin's caller is marked as __device__ or __kernel__.

stack-info: PR: https://github.com/llvm/llvm-project/pull/165387, branch: users/jmmartinez/fix/load_lds_typesignature/1
---
 clang/lib/Sema/SemaExpr.cpp                   |  6 +++--
 .../SemaHIP/amdgpu-gfx950-load-to-lds.hip     | 26 +++++++++----------
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index a50c27610dc96..1d1b0f5c75905 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -6734,8 +6734,10 @@ ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc,
     // If Arg is declared in the default address space and Param is declared
     // in a non-default address space, perform an implicit address space cast to
     // the parameter type.
-    if (getLangOpts().HIP && getLangOpts().CUDAIsDevice && FD &&
-        FD->getBuiltinID()) {
+    FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda =*/true);
+    bool CallerIsDevice = Caller && (Caller->hasAttr<CUDAGlobalAttr>() ||
+                                     Caller->hasAttr<CUDADeviceAttr>());
+    if (getLangOpts().HIP && CallerIsDevice && FD && FD->getBuiltinID()) {
       for (unsigned Idx = 0; Idx < ArgExprs.size() && Idx < FD->param_size();
           ++Idx) {
         ParmVarDecl *Param = FD->getParamDecl(Idx);
diff --git a/clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip b/clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip
index 366278f648939..b49c1866caa1c 100644
--- a/clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip
+++ b/clang/test/SemaHIP/amdgpu-gfx950-load-to-lds.hip
@@ -1,7 +1,7 @@
 // REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -fsyntax-only -triple amdgcn -target-cpu gfx950 -verify=device %s -fcuda-is-device
-// RUN: %clang_cc1 -fsyntax-only -triple x86_64 -aux-triple amdgcn -verify=host %s
-// device-no-diagnostics
+// RUN: %clang_cc1 -fsyntax-only -triple amdgcn -target-cpu gfx950 -verify %s -fcuda-is-device
+// RUN: %clang_cc1 -fsyntax-only -triple x86_64 -aux-triple amdgcn -verify %s
+// expected-no-diagnostics
 
 #define __device__ __attribute__((device))
 #define __global__ __attribute__((global))
@@ -20,11 +20,11 @@ __device__ void i_am_device(void* src, __amdgpu_buffer_rsrc_t rsrc, __shared__ v
     __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 12, vindex, voffset, soffset, 0, 0);
     __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 16, vindex, voffset, soffset, 0, 0);
 
-    __builtin_amdgcn_load_to_lds(src, dst, 1, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
-    __builtin_amdgcn_load_to_lds(src, dst, 2, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
-    __builtin_amdgcn_load_to_lds(src, dst, 4, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
-    __builtin_amdgcn_load_to_lds(src, dst, 12, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
-    __builtin_amdgcn_load_to_lds(src, dst, 16, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+    __builtin_amdgcn_load_to_lds(src, dst, 1, 0, 0);
+    __builtin_amdgcn_load_to_lds(src, dst, 2, 0, 0);
+    __builtin_amdgcn_load_to_lds(src, dst, 4, 0, 0);
+    __builtin_amdgcn_load_to_lds(src, dst, 12, 0, 0);
+    __builtin_amdgcn_load_to_lds(src, dst, 16, 0, 0);
 
     __builtin_amdgcn_global_load_lds(src, dst, 1, 0 , 0);
     __builtin_amdgcn_global_load_lds(src, dst, 2, 0 , 0);
@@ -46,11 +46,11 @@ __global__ void i_am_kernel(void* src, __amdgpu_buffer_rsrc_t rsrc, __shared__ v
     __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 12, vindex, voffset, soffset, 0, 0);
     __builtin_amdgcn_struct_ptr_buffer_load_lds(rsrc, dst, 16, vindex, voffset, soffset, 0, 0);
 
-    __builtin_amdgcn_load_to_lds(src, dst, 1, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
-    __builtin_amdgcn_load_to_lds(src, dst, 2, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
-    __builtin_amdgcn_load_to_lds(src, dst, 4, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
-    __builtin_amdgcn_load_to_lds(src, dst, 12, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
-    __builtin_amdgcn_load_to_lds(src, dst, 16, 0, 0); // host-error{{cannot initialize a parameter of type '__attribute__((address_space(3))) void *' with an lvalue of type 'void *'}}
+    __builtin_amdgcn_load_to_lds(src, dst, 1, 0, 0);
+    __builtin_amdgcn_load_to_lds(src, dst, 2, 0, 0);
+    __builtin_amdgcn_load_to_lds(src, dst, 4, 0, 0);
+    __builtin_amdgcn_load_to_lds(src, dst, 12, 0, 0);
+    __builtin_amdgcn_load_to_lds(src, dst, 16, 0, 0);
 
     __builtin_amdgcn_global_load_lds(src, dst, 1, 0 , 0);
     __builtin_amdgcn_global_load_lds(src, dst, 2, 0 , 0);



More information about the cfe-commits mailing list