[clang] 2857f3b - [clang][SYCL] Strip references from generated kernel argument types (#186788)

via cfe-commits cfe-commits at lists.llvm.org
Tue Mar 24 05:03:09 PDT 2026


Author: Mariya Podchishchaeva
Date: 2026-03-24T13:03:04+01:00
New Revision: 2857f3bb6af5b7befee796c7ad3bb4edf8b6b028

URL: https://github.com/llvm/llvm-project/commit/2857f3bb6af5b7befee796c7ad3bb4edf8b6b028
DIFF: https://github.com/llvm/llvm-project/commit/2857f3bb6af5b7befee796c7ad3bb4edf8b6b028.diff

LOG: [clang][SYCL] Strip references from generated kernel argument types (#186788)

Prior to this patch kernel generation copied kernel argument types
exactly as they're written in function's signature of the function
attributed with [[clang::sycl_kernel_entry_point]] attribute. This
caused generation of kernels that have reference kernel argument instead
of byval kernel argument.
SYCL 2020 doesn't allow reference kernel arguments and it doesn't seem
to work with the backends.
Arguments to [[clang::sycl_kernel_entry_point]]-attributed function can
be big, so we preserve references during host code generation to avoid
performance issues in SYCL Runtime library implementation because the
same function will be used for actual kernel argument setting via
sycl_kernel_launch interface.
Note that we still need to diagnose references in user's kernel
arguments since they are explicitly not allowed by SYCL 2020 spec and
this task is in TODO list. This patch simply removes references from
types written in SYCL Runtime library.

Assisted-by: claude in test writing.

Added: 
    

Modified: 
    clang/lib/Sema/SemaSYCL.cpp
    clang/test/ASTSYCL/ast-dump-sycl-kernel-call-stmt.cpp
    clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp
index ff8ad61aa3af2..112a6e4416df2 100644
--- a/clang/lib/Sema/SemaSYCL.cpp
+++ b/clang/lib/Sema/SemaSYCL.cpp
@@ -608,10 +608,10 @@ class OutlinedFunctionDeclBodyInstantiator
       ParmDeclMap::iterator I = MapRef.find(PVD);
       if (I != MapRef.end()) {
         VarDecl *VD = I->second;
-        assert(SemaRef.getASTContext().hasSameUnqualifiedType(PVD->getType(),
-                                                              VD->getType()));
-        assert(!VD->getType().isMoreQualifiedThan(PVD->getType(),
-                                                  SemaRef.getASTContext()));
+        assert(SemaRef.getASTContext().hasSameUnqualifiedType(
+            PVD->getType().getNonReferenceType(), VD->getType()));
+        assert(!VD->getType().isMoreQualifiedThan(
+            PVD->getType().getNonReferenceType(), SemaRef.getASTContext()));
         VD->setIsUsed();
         return DeclRefExpr::Create(
             SemaRef.getASTContext(), DRE->getQualifierLoc(),
@@ -650,7 +650,7 @@ OutlinedFunctionDecl *BuildSYCLKernelEntryPointOutline(Sema &SemaRef,
   for (ParmVarDecl *PVD : FD->parameters()) {
     ImplicitParamDecl *IPD = ImplicitParamDecl::Create(
         SemaRef.getASTContext(), OFD, SourceLocation(), PVD->getIdentifier(),
-        PVD->getType(), ImplicitParamKind::Other);
+        PVD->getType().getNonReferenceType(), ImplicitParamKind::Other);
     OFD->setParam(i, IPD);
     ParmMap[PVD] = IPD;
     ++i;

diff  --git a/clang/test/ASTSYCL/ast-dump-sycl-kernel-call-stmt.cpp b/clang/test/ASTSYCL/ast-dump-sycl-kernel-call-stmt.cpp
index 0c9ee05ac7614..9563892398da9 100644
--- a/clang/test/ASTSYCL/ast-dump-sycl-kernel-call-stmt.cpp
+++ b/clang/test/ASTSYCL/ast-dump-sycl-kernel-call-stmt.cpp
@@ -308,12 +308,12 @@ void skep6(const S6 &k) {
 // CHECK-NEXT: | | |   `-CXXConstructExpr {{.*}} 'S6' 'void (const S6 &) noexcept'
 // CHECK-NEXT: | | |     `-DeclRefExpr {{.*}} 'const S6' lvalue ParmVar {{.*}} 'k' 'const S6 &'
 // CHECK-NEXT: | | `-OutlinedFunctionDecl {{.*}}
-// CHECK-NEXT: | |   |-ImplicitParamDecl {{.*}} implicit used k 'const S6 &'
+// CHECK-NEXT: | |   |-ImplicitParamDecl {{.*}} implicit used k 'const S6'
 // CHECK-NEXT: | |   `-CompoundStmt {{.*}}
 // CHECK-NEXT: | |     `-CXXOperatorCallExpr {{.*}} 'void' '()'
 // CHECK-NEXT: | |       |-ImplicitCastExpr {{.*}} 'void (*)() const' <FunctionToPointerDecay>
 // CHECK-NEXT: | |       | `-DeclRefExpr {{.*}} 'void () const' lvalue CXXMethod {{.*}} 'operator()' 'void () const'
-// CHECK-NEXT: | |       `-DeclRefExpr {{.*}} 'const S6' lvalue ImplicitParam {{.*}} 'k' 'const S6 &'
+// CHECK-NEXT: | |       `-DeclRefExpr {{.*}} 'const S6' lvalue ImplicitParam {{.*}} 'k' 'const S6'
 // CHECK-NEXT: | `-SYCLKernelEntryPointAttr {{.*}} KN<6>
 
 // Parameter types are not required to be complete at the point of a

diff  --git a/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp b/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp
index 47c2c45ae7749..7af4c83d1ba32 100644
--- a/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp
+++ b/clang/test/CodeGenSYCL/kernel-caller-entry-point.cpp
@@ -68,6 +68,36 @@ struct copyable {
   ~copyable();
 };
 
+namespace std {
+template<typename T> constexpr T &&move(T &val) { return static_cast<T&&>(val); }
+template<class T>
+struct type_identity { using type = T; };
+}
+struct ref_arg_kernel_name;
+
+template <typename KernelName, typename KernelType>
+[[clang::sycl_kernel_entry_point(KernelName)]]
+void ref_arg_kernel(const KernelType &ref) {
+  ref(42);
+}
+
+struct fwd_ref_arg_kernel_name;
+struct fwd_ref_arg_kernel_name_move;
+
+template <typename KernelName, typename KernelType>
+[[clang::sycl_kernel_entry_point(KernelName)]]
+void fwd_ref_arg_kernel(KernelType &&ref) {
+  ref(42);
+}
+
+struct rvalue_ref_arg_kernel_name;
+
+template <typename KernelName, typename KernelType>
+[[clang::sycl_kernel_entry_point(KernelName)]]
+void rvalue_ref_arg_kernel(typename std::type_identity<KernelType>::type &&ref) {
+  ref(42);
+}
+
 int main() {
   single_purpose_kernel obj;
   single_purpose_kernel_task(obj);
@@ -78,6 +108,11 @@ int main() {
   handler h;
   copyable c{42};
   h.kernel_entry_point<struct KN>([=] (int a, int b) { return c.i + a + b; }, 1, 2);
+  ref_arg_kernel<ref_arg_kernel_name>(lambda);
+  fwd_ref_arg_kernel<fwd_ref_arg_kernel_name>(lambda);
+  fwd_ref_arg_kernel<fwd_ref_arg_kernel_name_move>(std::move(lambda));
+  auto anotherlambda = [=](auto) { (void) capture; };
+  rvalue_ref_arg_kernel<rvalue_ref_arg_kernel_name, decltype(anotherlambda)>(std::move(anotherlambda));
 }
 
 // Verify that SYCL kernel caller functions are not emitted during host
@@ -87,6 +122,10 @@ int main() {
 // CHECK-HOST-NOT: define {{.*}} @_ZTSZ4mainEUlT_E_
 // CHECK-HOST-NOT: define {{.*}} @"_ZTS6\CE\B4\CF\84\CF\87"
 // CHECK-HOST-NOT: define {{.*}} @_ZTSZ4mainE2KN
+// CHECK-HOST-NOT: define {{.*}} @_ZTS19ref_arg_kernel_name
+// CHECK-HOST-NOT: define {{.*}} @_ZTS23fwd_ref_arg_kernel_name
+// CHECK-HOST-NOT: define {{.*}} @_ZTS28fwd_ref_arg_kernel_name_move
+// CHECK-HOST-NOT: define {{.*}} @_ZTS26rvalue_ref_arg_kernel_name
 
 // Verify that sycl_kernel_entry_point attributed functions are not emitted
 // during device compilation.
@@ -94,6 +133,9 @@ int main() {
 // CHECK-DEVICE-NOT: single_purpose_kernel_task
 // CHECK-DEVICE-NOT: kernel_single_task
 // CHECK-DEVICE-NOT: kernel_entry_point
+// CHECK-DEVICE-NOT: ref_arg_kernel
+// CHECK-DEVICE-NOT: fwd_ref_arg_kernel
+// CHECK-DEVICE-NOT: rvalue_ref_arg_kernel
 
 // Verify that kernel launch code is generated for sycl_kernel_entry_point
 // attributed functions during host compilation.
@@ -101,6 +143,19 @@ int main() {
 // CHECK-HOST-LINUX:      @.str = private unnamed_addr constant [33 x i8] c"_ZTS26single_purpose_kernel_name\00", align 1
 // CHECK-HOST-LINUX:      @.str.1 = private unnamed_addr constant [18 x i8] c"_ZTSZ4mainEUlT_E_\00", align 1
 // CHECK-HOST-LINUX:      @.str.2 = private unnamed_addr constant [12 x i8] c"_ZTS6\CE\B4\CF\84\CF\87\00", align 1
+// CHECK-HOST-LINUX:      @.str.3 = private unnamed_addr constant [15 x i8] c"_ZTSZ4mainE2KN\00", align 1
+// CHECK-HOST-LINUX:      @.str.4 = private unnamed_addr constant [26 x i8] c"_ZTS19ref_arg_kernel_name\00", align 1
+// CHECK-HOST-LINUX       @.str.5 = private unnamed_addr constant [30 x i8] c"_ZTS23fwd_ref_arg_kernel_name\00", align 1
+// CHECK-HOST-LINUX:      @.str.6 = private unnamed_addr constant [35 x i8] c"_ZTS28fwd_ref_arg_kernel_name_move\00", align 1
+// CHECK-HOST-LINUX:      @.str.7 = private unnamed_addr constant [33 x i8] c"_ZTS26rvalue_ref_arg_kernel_name\00", align 1
+// CHECK-HOST-WINDOWS: @"??_C at _0CB@KFIJOMLB at _ZTS26single_purpose_kernel_name@" = linkonce_odr dso_local unnamed_addr constant [33 x i8] c"_ZTS26single_purpose_kernel_name\00", comdat, align 1
+// CHECK-HOST-WINDOWS: @"??_C at _0BC@NHCDOLAA at _ZTSZ4mainEUlT_E_?$AA@" = linkonce_odr dso_local unnamed_addr constant [18 x i8] c"_ZTSZ4mainEUlT_E_\00", comdat, align 1
+// CHECK-HOST-WINDOWS: @"??_C at _0M@BCGAEMBE at _ZTS6?N?$LE?O?$IE?O?$IH?$AA@" = linkonce_odr dso_local unnamed_addr constant [12 x i8] c"_ZTS6\CE\B4\CF\84\CF\87\00", comdat, align 1
+// CHECK-HOST-WINDOWS: @"??_C at _0P@DLGHPODL at _ZTSZ4mainE2KN?$AA@" = linkonce_odr dso_local unnamed_addr constant [15 x i8] c"_ZTSZ4mainE2KN\00", comdat, align 1
+// CHECK-HOST-WINDOWS: @"??_C at _0BK@PPDJPOBM at _ZTS19ref_arg_kernel_name?$AA@" = linkonce_odr dso_local unnamed_addr constant [26 x i8] c"_ZTS19ref_arg_kernel_name\00", comdat, align 1
+// CHECK-HOST-WINDOWS: @"??_C at _0BO@KEIBIHKH at _ZTS23fwd_ref_arg_kernel_name?$AA@" = linkonce_odr dso_local unnamed_addr constant [30 x i8] c"_ZTS23fwd_ref_arg_kernel_name\00", comdat, align 1
+// CHECK-HOST-WINDOWS: @"??_C at _0CD@FDALJLMM at _ZTS28fwd_ref_arg_kernel_name_mo@" = linkonce_odr dso_local unnamed_addr constant [35 x i8] c"_ZTS28fwd_ref_arg_kernel_name_move\00", comdat, align 1
+// CHECK-HOST-WINDOWS: @"??_C at _0CB@HCPMABHM at _ZTS26rvalue_ref_arg_kernel_name@" = linkonce_odr dso_local unnamed_addr constant [33 x i8] c"_ZTS26rvalue_ref_arg_kernel_name\00", comdat, align 1
 //
 // CHECK-HOST-LINUX:      define dso_local void @_Z26single_purpose_kernel_task21single_purpose_kernel() #{{[0-9]+}} {
 // CHECK-HOST-LINUX-NEXT: entry:
@@ -151,6 +206,58 @@ int main() {
 // CHECK-HOST-LINUX-NEXT:   call void @_ZZ4mainENUliiE_D1Ev(ptr noundef nonnull align 4 dead_on_return(4) dereferenceable(4) %agg.tmp) #{{[0-9]+}}
 // CHECK-HOST-LINUX-NEXT:   ret void
 // CHECK-HOST-LINUX-NEXT: }
+//
+// CHECK-HOST-LINUX:      define internal void @_Z14ref_arg_kernelI19ref_arg_kernel_nameZ4mainEUlT_E_EvRKT0_(ptr noundef nonnull align 4 dereferenceable(4) %ref) #{{[0-9]+}} {
+// CHECK-HOST-LINUX-NEXT: entry:
+// CHECK-HOST-LINUX-NEXT:   %ref.addr = alloca ptr, align 8
+// CHECK-HOST-LINUX-NEXT:   %agg.tmp = alloca %class.anon, align 4
+// CHECK-HOST-LINUX-NEXT:   store ptr %ref, ptr %ref.addr, align 8
+// CHECK-HOST-LINUX-NEXT:   %0 = load ptr, ptr %ref.addr, align 8
+// CHECK-HOST-LINUX-NEXT:   call void @llvm.memcpy.p0.p0.i64(ptr align 4 %agg.tmp, ptr align 4 %0, i64 4, i1 false)
+// CHECK-HOST-LINUX-NEXT:   %coerce.dive = getelementptr inbounds nuw %class.anon, ptr %agg.tmp, i32 0, i32 0
+// CHECK-HOST-LINUX-NEXT:   %1 = load i32, ptr %coerce.dive, align 4
+// CHECK-HOST-LINUX-NEXT:   call void @_Z18sycl_kernel_launchI19ref_arg_kernel_nameJZ4mainEUlT_E_EEvPKcDpT0_(ptr noundef @.str.4, i32 %1)
+// CHECK-HOST-LINUX-NEXT:   ret void
+// CHECK-HOST-LINUX-NEXT: }
+//
+// CHECK-HOST-LINUX:      define internal void @_Z18fwd_ref_arg_kernelI23fwd_ref_arg_kernel_nameRZ4mainEUlT_E_EvOT0_(ptr noundef nonnull align 4 dereferenceable(4) %ref) #{{[0-9]+}} {
+// CHECK-HOST-LINUX-NEXT: entry:
+// CHECK-HOST-LINUX-NEXT:   %ref.addr = alloca ptr, align 8
+// CHECK-HOST-LINUX-NEXT:   %agg.tmp = alloca %class.anon, align 4
+// CHECK-HOST-LINUX-NEXT:   store ptr %ref, ptr %ref.addr, align 8
+// CHECK-HOST-LINUX-NEXT:   %0 = load ptr, ptr %ref.addr, align 8
+// CHECK-HOST-LINUX-NEXT:   call void @llvm.memcpy.p0.p0.i64(ptr align 4 %agg.tmp, ptr align 4 %0, i64 4, i1 false)
+// CHECK-HOST-LINUX-NEXT:   %coerce.dive = getelementptr inbounds nuw %class.anon, ptr %agg.tmp, i32 0, i32 0
+// CHECK-HOST-LINUX-NEXT:   %1 = load i32, ptr %coerce.dive, align 4
+// CHECK-HOST-LINUX-NEXT:   call void @_Z18sycl_kernel_launchI23fwd_ref_arg_kernel_nameJZ4mainEUlT_E_EEvPKcDpT0_(ptr noundef @.str.5, i32 %1)
+// CHECK-HOST-LINUX-NEXT:   ret void
+// CHECK-HOST-LINUX-NEXT: }
+//
+// CHECK-HOST-LINUX:      define internal void @_Z18fwd_ref_arg_kernelI28fwd_ref_arg_kernel_name_moveZ4mainEUlT_E_EvOT0_(ptr noundef nonnull align 4 dereferenceable(4) %ref) #{{[0-9]+}} {
+// CHECK-HOST-LINUX-NEXT: entry:
+// CHECK-HOST-LINUX-NEXT:   %ref.addr = alloca ptr, align 8
+// CHECK-HOST-LINUX-NEXT:   %agg.tmp = alloca %class.anon, align 4
+// CHECK-HOST-LINUX-NEXT:   store ptr %ref, ptr %ref.addr, align 8
+// CHECK-HOST-LINUX-NEXT:   %0 = load ptr, ptr %ref.addr, align 8
+// CHECK-HOST-LINUX-NEXT:   call void @llvm.memcpy.p0.p0.i64(ptr align 4 %agg.tmp, ptr align 4 %0, i64 4, i1 false)
+// CHECK-HOST-LINUX-NEXT:   %coerce.dive = getelementptr inbounds nuw %class.anon, ptr %agg.tmp, i32 0, i32 0
+// CHECK-HOST-LINUX-NEXT:   %1 = load i32, ptr %coerce.dive, align 4
+// CHECK-HOST-LINUX-NEXT:   call void @_Z18sycl_kernel_launchI28fwd_ref_arg_kernel_name_moveJZ4mainEUlT_E_EEvPKcDpT0_(ptr noundef @.str.6, i32 %1)
+// CHECK-HOST-LINUX-NEXT:   ret void
+// CHECK-HOST-LINUX-NEXT: }
+//
+// CHECK-HOST-LINUX:      define internal void @_Z21rvalue_ref_arg_kernelI26rvalue_ref_arg_kernel_nameZ4mainEUlT_E0_EvONSt13type_identityIT0_E4typeE(ptr noundef nonnull align 4 dereferenceable(4) %ref) #{{[0-9]+}} {
+// CHECK-HOST-LINUX-NEXT: entry:
+// CHECK-HOST-LINUX-NEXT:   %ref.addr = alloca ptr, align 8
+// CHECK-HOST-LINUX-NEXT:   %agg.tmp = alloca %class.anon.2, align 4
+// CHECK-HOST-LINUX-NEXT:   store ptr %ref, ptr %ref.addr, align 8
+// CHECK-HOST-LINUX-NEXT:   %0 = load ptr, ptr %ref.addr, align 8
+// CHECK-HOST-LINUX-NEXT:   call void @llvm.memcpy.p0.p0.i64(ptr align 4 %agg.tmp, ptr align 4 %0, i64 4, i1 false)
+// CHECK-HOST-LINUX-NEXT:   %coerce.dive = getelementptr inbounds nuw %class.anon.2, ptr %agg.tmp, i32 0, i32 0
+// CHECK-HOST-LINUX-NEXT:   %1 = load i32, ptr %coerce.dive, align 4
+// CHECK-HOST-LINUX-NEXT:   call void @_Z18sycl_kernel_launchI26rvalue_ref_arg_kernel_nameJZ4mainEUlT_E0_EEvPKcDpT0_(ptr noundef @.str.7, i32 %1)
+// CHECK-HOST-LINUX-NEXT:   ret void
+// CHECK-HOST-LINUX-NEXT: }
 
 // CHECK-HOST-WINDOWS:      define dso_local void @"?single_purpose_kernel_task@@YAXUsingle_purpose_kernel@@@Z"(i8 %kernelFunc.coerce) #{{[0-9]+}} {
 // CHECK-HOST-WINDOWS-NEXT: entry:
@@ -214,6 +321,58 @@ int main() {
 // CHECK-HOST-WINDOWS-NEXT:   call void @"??1<lambda_3>@?0??main@@9 at QEAA@XZ"(ptr noundef nonnull align 4 dead_on_return(4) dereferenceable(4) %k) #{{[0-9]+}}
 // CHECK-HOST-WINDOWS-NEXT:   ret void
 // CHECK-HOST-WINDOWS-NEXT: }
+//
+// CHECK-HOST-WINDOWS:      define internal void @"??$ref_arg_kernel at Uref_arg_kernel_name@@V<lambda_1>@?0??main@@9@@@YAXAEBV<lambda_1>@?0??main@@9@@Z"(ptr noundef nonnull align 4 dereferenceable(4) %ref) #{{[0-9]+}} {
+// CHECK-HOST-WINDOWS-NEXT: entry:
+// CHECK-HOST-WINDOWS-NEXT:   %ref.addr = alloca ptr, align 8
+// CHECK-HOST-WINDOWS-NEXT:   %agg.tmp = alloca %class.anon, align 4
+// CHECK-HOST-WINDOWS-NEXT:   store ptr %ref, ptr %ref.addr, align 8
+// CHECK-HOST-WINDOWS-NEXT:   %0 = load ptr, ptr %ref.addr, align 8
+// CHECK-HOST-WINDOWS-NEXT:   call void @llvm.memcpy.p0.p0.i64(ptr align 4 %agg.tmp, ptr align 4 %0, i64 4, i1 false)
+// CHECK-HOST-WINDOWS-NEXT:   %coerce.dive = getelementptr inbounds nuw %class.anon, ptr %agg.tmp, i32 0, i32 0
+// CHECK-HOST-WINDOWS-NEXT:   %1 = load i32, ptr %coerce.dive, align 4
+// CHECK-HOST-WINDOWS-NEXT:   call void @"??$sycl_kernel_launch at Uref_arg_kernel_name@@V<lambda_1>@?0??main@@9@@@YAXPEBDV<lambda_1>@?0??main@@9@@Z"(ptr noundef @"??_C at _0BK@PPDJPOBM at _ZTS19ref_arg_kernel_name?$AA@", i32 %1)
+// CHECK-HOST-WINDOWS-NEXT:   ret void
+// CHECK-HOST-WINDOWS-NEXT: }
+//
+// CHECK-HOST-WINDOWS:      define internal void @"??$fwd_ref_arg_kernel at Ufwd_ref_arg_kernel_name@@AEAV<lambda_1>@?0??main@@9@@@YAXAEAV<lambda_1>@?0??main@@9@@Z"(ptr noundef nonnull align 4 dereferenceable(4) %ref) #{{[0-9]+}} {
+// CHECK-HOST-WINDOWS-NEXT: entry:
+// CHECK-HOST-WINDOWS-NEXT:   %ref.addr = alloca ptr, align 8
+// CHECK-HOST-WINDOWS-NEXT:   %agg.tmp = alloca %class.anon, align 4
+// CHECK-HOST-WINDOWS-NEXT:   store ptr %ref, ptr %ref.addr, align 8
+// CHECK-HOST-WINDOWS-NEXT:   %0 = load ptr, ptr %ref.addr, align 8
+// CHECK-HOST-WINDOWS-NEXT:   call void @llvm.memcpy.p0.p0.i64(ptr align 4 %agg.tmp, ptr align 4 %0, i64 4, i1 false)
+// CHECK-HOST-WINDOWS-NEXT:   %coerce.dive = getelementptr inbounds nuw %class.anon, ptr %agg.tmp, i32 0, i32 0
+// CHECK-HOST-WINDOWS-NEXT:   %1 = load i32, ptr %coerce.dive, align 4
+// CHECK-HOST-WINDOWS-NEXT:   call void @"??$sycl_kernel_launch at Ufwd_ref_arg_kernel_name@@V<lambda_1>@?0??main@@9@@@YAXPEBDV<lambda_1>@?0??main@@9@@Z"(ptr noundef @"??_C at _0BO@KEIBIHKH at _ZTS23fwd_ref_arg_kernel_name?$AA@", i32 %1)
+// CHECK-HOST-WINDOWS-NEXT:   ret void
+// CHECK-HOST-WINDOWS-NEXT: }
+//
+// CHECK-HOST-WINDOWS:      define internal void @"??$fwd_ref_arg_kernel at Ufwd_ref_arg_kernel_name_move@@V<lambda_1>@?0??main@@9@@@YAX$$QEAV<lambda_1>@?0??main@@9@@Z"(ptr noundef nonnull align 4 dereferenceable(4) %ref) #{{[0-9]+}} {
+// CHECK-HOST-WINDOWS-NEXT: entry:
+// CHECK-HOST-WINDOWS-NEXT:   %ref.addr = alloca ptr, align 8
+// CHECK-HOST-WINDOWS-NEXT:   %agg.tmp = alloca %class.anon, align 4
+// CHECK-HOST-WINDOWS-NEXT:   store ptr %ref, ptr %ref.addr, align 8
+// CHECK-HOST-WINDOWS-NEXT:   %0 = load ptr, ptr %ref.addr, align 8
+// CHECK-HOST-WINDOWS-NEXT:   call void @llvm.memcpy.p0.p0.i64(ptr align 4 %agg.tmp, ptr align 4 %0, i64 4, i1 false)
+// CHECK-HOST-WINDOWS-NEXT:   %coerce.dive = getelementptr inbounds nuw %class.anon, ptr %agg.tmp, i32 0, i32 0
+// CHECK-HOST-WINDOWS-NEXT:   %1 = load i32, ptr %coerce.dive, align 4
+// CHECK-HOST-WINDOWS-NEXT:   call void @"??$sycl_kernel_launch at Ufwd_ref_arg_kernel_name_move@@V<lambda_1>@?0??main@@9@@@YAXPEBDV<lambda_1>@?0??main@@9@@Z"(ptr noundef @"??_C at _0CD@FDALJLMM at _ZTS28fwd_ref_arg_kernel_name_mo@", i32 %1)
+// CHECK-HOST-WINDOWS-NEXT:   ret void
+// CHECK-HOST-WINDOWS-NEXT: }
+//
+// CHECK-HOST-WINDOWS:      define internal void @"??$rvalue_ref_arg_kernel at Urvalue_ref_arg_kernel_name@@V<lambda_4>@?0??main@@9@@@YAX$$QEAV<lambda_4>@?0??main@@9@@Z"(ptr noundef nonnull align 4 dereferenceable(4) %ref) #{{[0-9]+}} {
+// CHECK-HOST-WINDOWS-NEXT: entry:
+// CHECK-HOST-WINDOWS-NEXT:   %ref.addr = alloca ptr, align 8
+// CHECK-HOST-WINDOWS-NEXT:   %agg.tmp = alloca %class.anon.2, align 4
+// CHECK-HOST-WINDOWS-NEXT:   store ptr %ref, ptr %ref.addr, align 8
+// CHECK-HOST-WINDOWS-NEXT:   %0 = load ptr, ptr %ref.addr, align 8
+// CHECK-HOST-WINDOWS-NEXT:   call void @llvm.memcpy.p0.p0.i64(ptr align 4 %agg.tmp, ptr align 4 %0, i64 4, i1 false)
+// CHECK-HOST-WINDOWS-NEXT:   %coerce.dive = getelementptr inbounds nuw %class.anon.2, ptr %agg.tmp, i32 0, i32 0
+// CHECK-HOST-WINDOWS-NEXT:   %1 = load i32, ptr %coerce.dive, align 4
+// CHECK-HOST-WINDOWS-NEXT:   call void @"??$sycl_kernel_launch at Urvalue_ref_arg_kernel_name@@V<lambda_4>@?0??main@@9@@@YAXPEBDV<lambda_4>@?0??main@@9@@Z"(ptr noundef @"??_C at _0CB@HCPMABHM at _ZTS26rvalue_ref_arg_kernel_name@", i32 %1)
+// CHECK-HOST-WINDOWS-NEXT:   ret void
+// CHECK-HOST-WINDOWS-NEXT: }
 
 // Verify that SYCL kernel caller functions are emitted for each device target.
 //
@@ -412,6 +571,138 @@ int main() {
 // CHECK-SPIRV-NEXT:    ret void
 // CHECK-SPIRV-NEXT:  }
 
+// IR for the SYCL kernel caller function generated for ref_arg_kernel with
+// ref_arg_kernel_name as the SYCL kernel name type. The reference parameter is
+// lowered to a value parameter in the kernel entry point.
+//
+// CHECK-AMDGCN:      define dso_local amdgpu_kernel void @_ZTS19ref_arg_kernel_name
+// CHECK-AMDGCN-SAME:   (i32 %ref.coerce) #[[AMDGCN_ATTR0]] {
+// CHECK-AMDGCN-NEXT: entry:
+// CHECK-AMDGCN-NEXT:   %ref = alloca %class.anon, align 4, addrspace(5)
+// CHECK-AMDGCN-NEXT:   %ref1 = addrspacecast ptr addrspace(5) %ref to ptr
+// CHECK-AMDGCN-NEXT:   %coerce.dive = getelementptr inbounds nuw %class.anon, ptr %ref1, i32 0, i32 0
+// CHECK-AMDGCN-NEXT:   store i32 %ref.coerce, ptr %coerce.dive, align 4
+// CHECK-AMDGCN-NEXT:   call void @_ZZ4mainENKUlT_E_clIiEEDaS_
+// CHECK-AMDGCN-SAME:     (ptr noundef nonnull align 4 dereferenceable(4) %ref1, i32 noundef 42) #[[AMDGCN_ATTR1]]
+// CHECK-AMDGCN-NEXT:   ret void
+// CHECK-AMDGCN-NEXT: }
+//
+// CHECK-NVPTX:       define dso_local ptx_kernel void @_ZTS19ref_arg_kernel_name
+// CHECK-NVPTX-SAME:    (ptr noundef byval(%class.anon) align 4 %ref) #[[NVPTX_ATTR0]] {
+// CHECK-NVPTX-NEXT:  entry:
+// CHECK-NVPTX-NEXT:    call void @_ZZ4mainENKUlT_E_clIiEEDaS_
+// CHECK-NVPTX-SAME:      (ptr noundef nonnull align 4 dereferenceable(4) %ref, i32 noundef 42) #[[NVPTX_ATTR1]]
+// CHECK-NVPTX-NEXT:    ret void
+// CHECK-NVPTX-NEXT:  }
+//
+// CHECK-SPIR:        define {{[a-z_ ]*}}spir_kernel void @_ZTS19ref_arg_kernel_name
+// CHECK-SPIR-SAME:     (ptr noundef byval(%class.anon) align 4 %ref) #[[SPIR_ATTR0]] {
+// CHECK-SPIR-NEXT:   entry:
+// CHECK-SPIR-NEXT:     %ref.ascast = addrspacecast ptr %ref to ptr addrspace(4)
+// CHECK-SPIR-NEXT:     call spir_func void @_ZZ4mainENKUlT_E_clIiEEDaS_
+// CHECK-SPIR-SAME:       (ptr addrspace(4) noundef align 4 dereferenceable_or_null(4) %ref.ascast, i32 noundef 42) #[[SPIR_ATTR1]]
+// CHECK-SPIR-NEXT:     ret void
+// CHECK-SPIR-NEXT:   }
+
+// IR for the SYCL kernel caller function generated for fwd_ref_arg_kernel
+// with fwd_ref_arg_kernel_name as the SYCL kernel name type. The forwarding
+// reference parameter is lowered to a value parameter in the kernel entry point.
+//
+// CHECK-AMDGCN:      define dso_local amdgpu_kernel void @_ZTS23fwd_ref_arg_kernel_name
+// CHECK-AMDGCN-SAME:   (i32 %ref.coerce) #[[AMDGCN_ATTR0]] {
+// CHECK-AMDGCN-NEXT: entry:
+// CHECK-AMDGCN-NEXT:   %ref = alloca %class.anon, align 4, addrspace(5)
+// CHECK-AMDGCN-NEXT:   %ref1 = addrspacecast ptr addrspace(5) %ref to ptr
+// CHECK-AMDGCN-NEXT:   %coerce.dive = getelementptr inbounds nuw %class.anon, ptr %ref1, i32 0, i32 0
+// CHECK-AMDGCN-NEXT:   store i32 %ref.coerce, ptr %coerce.dive, align 4
+// CHECK-AMDGCN-NEXT:   call void @_ZZ4mainENKUlT_E_clIiEEDaS_
+// CHECK-AMDGCN-SAME:     (ptr noundef nonnull align 4 dereferenceable(4) %ref1, i32 noundef 42) #[[AMDGCN_ATTR1]]
+// CHECK-AMDGCN-NEXT:   ret void
+// CHECK-AMDGCN-NEXT: }
+//
+// CHECK-NVPTX:       define dso_local ptx_kernel void @_ZTS23fwd_ref_arg_kernel_name
+// CHECK-NVPTX-SAME:    (ptr noundef byval(%class.anon) align 4 %ref) #[[NVPTX_ATTR0]] {
+// CHECK-NVPTX-NEXT:  entry:
+// CHECK-NVPTX-NEXT:    call void @_ZZ4mainENKUlT_E_clIiEEDaS_
+// CHECK-NVPTX-SAME:      (ptr noundef nonnull align 4 dereferenceable(4) %ref, i32 noundef 42) #[[NVPTX_ATTR1]]
+// CHECK-NVPTX-NEXT:    ret void
+// CHECK-NVPTX-NEXT:  }
+//
+// CHECK-SPIR:        define {{[a-z_ ]*}}spir_kernel void @_ZTS23fwd_ref_arg_kernel_name
+// CHECK-SPIR-SAME:     (ptr noundef byval(%class.anon) align 4 %ref) #[[SPIR_ATTR0]] {
+// CHECK-SPIR-NEXT:   entry:
+// CHECK-SPIR-NEXT:     %ref.ascast = addrspacecast ptr %ref to ptr addrspace(4)
+// CHECK-SPIR-NEXT:     call spir_func void @_ZZ4mainENKUlT_E_clIiEEDaS_
+// CHECK-SPIR-SAME:       (ptr addrspace(4) noundef align 4 dereferenceable_or_null(4) %ref.ascast, i32 noundef 42) #[[SPIR_ATTR1]]
+// CHECK-SPIR-NEXT:     ret void
+// CHECK-SPIR-NEXT:   }
+
+// IR for the SYCL kernel caller function generated for fwd_ref_arg_kernel
+// with fwd_ref_arg_kernel_name_move as the SYCL kernel name type. The rvalue
+// reference parameter is lowered to a value parameter in the kernel entry point.
+//
+// CHECK-AMDGCN:      define dso_local amdgpu_kernel void @_ZTS28fwd_ref_arg_kernel_name_move
+// CHECK-AMDGCN-SAME:   (i32 %ref.coerce) #[[AMDGCN_ATTR0]] {
+// CHECK-AMDGCN-NEXT: entry:
+// CHECK-AMDGCN-NEXT:   %ref = alloca %class.anon, align 4, addrspace(5)
+// CHECK-AMDGCN-NEXT:   %ref1 = addrspacecast ptr addrspace(5) %ref to ptr
+// CHECK-AMDGCN-NEXT:   %coerce.dive = getelementptr inbounds nuw %class.anon, ptr %ref1, i32 0, i32 0
+// CHECK-AMDGCN-NEXT:   store i32 %ref.coerce, ptr %coerce.dive, align 4
+// CHECK-AMDGCN-NEXT:   call void @_ZZ4mainENKUlT_E_clIiEEDaS_
+// CHECK-AMDGCN-SAME:     (ptr noundef nonnull align 4 dereferenceable(4) %ref1, i32 noundef 42) #[[AMDGCN_ATTR1]]
+// CHECK-AMDGCN-NEXT:   ret void
+// CHECK-AMDGCN-NEXT: }
+//
+// CHECK-NVPTX:       define dso_local ptx_kernel void @_ZTS28fwd_ref_arg_kernel_name_move
+// CHECK-NVPTX-SAME:    (ptr noundef byval(%class.anon) align 4 %ref) #[[NVPTX_ATTR0]] {
+// CHECK-NVPTX-NEXT:  entry:
+// CHECK-NVPTX-NEXT:    call void @_ZZ4mainENKUlT_E_clIiEEDaS_
+// CHECK-NVPTX-SAME:      (ptr noundef nonnull align 4 dereferenceable(4) %ref, i32 noundef 42) #[[NVPTX_ATTR1]]
+// CHECK-NVPTX-NEXT:    ret void
+// CHECK-NVPTX-NEXT:  }
+//
+// CHECK-SPIR:        define {{[a-z_ ]*}}spir_kernel void @_ZTS28fwd_ref_arg_kernel_name_move
+// CHECK-SPIR-SAME:     (ptr noundef byval(%class.anon) align 4 %ref) #[[SPIR_ATTR0]] {
+// CHECK-SPIR-NEXT:   entry:
+// CHECK-SPIR-NEXT:     %ref.ascast = addrspacecast ptr %ref to ptr addrspace(4)
+// CHECK-SPIR-NEXT:     call spir_func void @_ZZ4mainENKUlT_E_clIiEEDaS_
+// CHECK-SPIR-SAME:       (ptr addrspace(4) noundef align 4 dereferenceable_or_null(4) %ref.ascast, i32 noundef 42) #[[SPIR_ATTR1]]
+// CHECK-SPIR-NEXT:     ret void
+// CHECK-SPIR-NEXT:   }
+
+// IR for the SYCL kernel caller function generated for rvalue_ref_arg_kernel
+// with rvalue_ref_arg_kernel_name as the SYCL kernel name type. The rvalue
+// reference parameter is lowered to a value parameter in the kernel entry point.
+//
+// CHECK-AMDGCN:      define dso_local amdgpu_kernel void @_ZTS26rvalue_ref_arg_kernel_name
+// CHECK-AMDGCN-SAME:   (i32 %ref.coerce) #[[AMDGCN_ATTR0]] {
+// CHECK-AMDGCN-NEXT: entry:
+// CHECK-AMDGCN-NEXT:   %ref = alloca %class.anon.2, align 4, addrspace(5)
+// CHECK-AMDGCN-NEXT:   %ref1 = addrspacecast ptr addrspace(5) %ref to ptr
+// CHECK-AMDGCN-NEXT:   %coerce.dive = getelementptr inbounds nuw %class.anon.2, ptr %ref1, i32 0, i32 0
+// CHECK-AMDGCN-NEXT:   store i32 %ref.coerce, ptr %coerce.dive, align 4
+// CHECK-AMDGCN-NEXT:   call void @_ZZ4mainENKUlT_E0_clIiEEDaS_
+// CHECK-AMDGCN-SAME:     (ptr noundef nonnull align 4 dereferenceable(4) %ref1, i32 noundef 42) #[[AMDGCN_ATTR1]]
+// CHECK-AMDGCN-NEXT:   ret void
+// CHECK-AMDGCN-NEXT: }
+//
+// CHECK-NVPTX:       define dso_local ptx_kernel void @_ZTS26rvalue_ref_arg_kernel_name
+// CHECK-NVPTX-SAME:    (ptr noundef byval(%class.anon.2) align 4 %ref) #[[NVPTX_ATTR0]] {
+// CHECK-NVPTX-NEXT:  entry:
+// CHECK-NVPTX-NEXT:    call void @_ZZ4mainENKUlT_E0_clIiEEDaS_
+// CHECK-NVPTX-SAME:      (ptr noundef nonnull align 4 dereferenceable(4) %ref, i32 noundef 42) #[[NVPTX_ATTR1]]
+// CHECK-NVPTX-NEXT:    ret void
+// CHECK-NVPTX-NEXT:  }
+//
+// CHECK-SPIR:        define {{[a-z_ ]*}}spir_kernel void @_ZTS26rvalue_ref_arg_kernel_name
+// CHECK-SPIR-SAME:     (ptr noundef byval(%class.anon.2) align 4 %ref) #[[SPIR_ATTR0]] {
+// CHECK-SPIR-NEXT:   entry:
+// CHECK-SPIR-NEXT:     %ref.ascast = addrspacecast ptr %ref to ptr addrspace(4)
+// CHECK-SPIR-NEXT:     call spir_func void @_ZZ4mainENKUlT_E0_clIiEEDaS_
+// CHECK-SPIR-SAME:       (ptr addrspace(4) noundef align 4 dereferenceable_or_null(4) %ref.ascast, i32 noundef 42) #[[SPIR_ATTR1]]
+// CHECK-SPIR-NEXT:     ret void
+// CHECK-SPIR-NEXT:   }
+
 // CHECK-AMDGCN: #[[AMDGCN_ATTR0]] = { convergent mustprogress noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
 // CHECK-AMDGCN: #[[AMDGCN_ATTR1]] = { convergent nounwind }
 //


        


More information about the cfe-commits mailing list