[clang] [CIR][HIP] Add Stub body emission test coverage and Fix kernelHandle storage (PR #179823)
David Rivera via cfe-commits
cfe-commits at lists.llvm.org
Thu Feb 19 09:51:20 PST 2026
https://github.com/RiverDave updated https://github.com/llvm/llvm-project/pull/179823
>From 2a625851b643eb9a4fffa496605e010172cee80e Mon Sep 17 00:00:00 2001
From: David Rivera <davidriverg at gmail.com>
Date: Wed, 4 Feb 2026 19:15:32 -0500
Subject: [PATCH] [CIR][HIP] Add Stub body emission test coverage and Fix
kernelHandle storage
---
clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp | 13 +++++--------
clang/test/CIR/CodeGenCUDA/kernel-call.cu | 15 ++++++++++++---
2 files changed, 17 insertions(+), 11 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp b/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp
index ad5da0d11ff02..451c28c3cccc1 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCUDANV.cpp
@@ -121,9 +121,6 @@ void CIRGenNVCUDARuntime::emitDeviceStubBodyNew(CIRGenFunction &cgf,
if (cgm.getLangOpts().OffloadViaLLVM)
cgm.errorNYI("CIRGenNVCUDARuntime: Offload via LLVM");
- if (cgm.getLangOpts().HIP)
- cgm.errorNYI("CIRGenNVCUDARuntime: HIP Support");
-
CIRGenBuilderTy &builder = cgm.getBuilder();
mlir::Location loc = fn.getLoc();
@@ -213,7 +210,8 @@ void CIRGenNVCUDARuntime::emitDeviceStubBodyNew(CIRGenFunction &cgf,
cir::PointerType kernelTy = cir::PointerType::get(globalOp.getSymType());
mlir::Value kernelVal = cir::GetGlobalOp::create(builder, loc, kernelTy,
globalOp.getSymName());
- return kernelVal;
+ mlir::Value func = builder.createBitcast(kernelVal, cgm.voidPtrTy);
+ return func;
}
if (cir::FuncOp funcOp = llvm::dyn_cast_or_null<cir::FuncOp>(
kernelHandles[fn.getSymName()])) {
@@ -325,10 +323,9 @@ mlir::Operation *CIRGenNVCUDARuntime::getKernelHandle(cir::FuncOp fn,
CIRGenBuilderTy &builder = cgm.getBuilder();
StringRef globalName = cgm.getMangledName(
gd.getWithKernelReferenceKind(KernelReferenceKind::Kernel));
- const VarDecl *varDecl = llvm::dyn_cast_or_null<VarDecl>(gd.getDecl());
- cir::GlobalOp globalOp =
- cgm.getOrCreateCIRGlobal(globalName, fn.getFunctionType().getReturnType(),
- LangAS::Default, varDecl, NotForDefinition);
+ cir::GlobalOp globalOp = CIRGenModule::createGlobalOp(
+ cgm, fn.getLoc(), globalName, fn.getFunctionType(),
+ /*isConstant=*/true);
globalOp->setAttr("alignment", builder.getI64IntegerAttr(
cgm.getPointerAlign().getQuantity()));
diff --git a/clang/test/CIR/CodeGenCUDA/kernel-call.cu b/clang/test/CIR/CodeGenCUDA/kernel-call.cu
index ccc33461567bf..05c48625335a6 100644
--- a/clang/test/CIR/CodeGenCUDA/kernel-call.cu
+++ b/clang/test/CIR/CodeGenCUDA/kernel-call.cu
@@ -1,16 +1,19 @@
// Based on clang/test/CodeGenCUDA/kernel-call.cu.
-// Tests device stub body emission for CUDA kernels.
+// Tests device stub body emission for CUDA and HIP kernels.
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-sdk-version=9.2 \
// RUN: -emit-cir %s -x cuda -o %t.cir
// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CUDA-NEW
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fhip-new-launch-api \
+// RUN: -x hip -emit-cir %s -o %t.hip.cir
+// RUN: FileCheck --input-file=%t.hip.cir %s --check-prefix=HIP-NEW
+
#include "Inputs/cuda.h"
// TODO: Test CUDA legacy (< 9.0) when legacy stub body is implemented
-// TODO: Test HIP when HIP stub body support is complete
// Check that the stub function is generated with the correct name
// CUDA-NEW-LABEL: cir.func {{.*}} @_Z21__device_stub__kernelif
@@ -48,5 +51,11 @@
// Check cudaLaunchKernel is called with all 6 arguments:
// func ptr, gridDim, blockDim, args, sharedMem, stream
// CUDA-NEW: cir.call @cudaLaunchKernel({{.*}}) : (!cir.ptr<!void>, !rec_dim3, !rec_dim3, !cir.ptr<!cir.ptr<!void>>, !u64i, !cir.ptr<!rec_cudaStream>) -> (!u32i {llvm.noundef})
-
+//
+// HIP-NEW: cir.global constant external @_Z6kernelif = #cir.global_view<@_Z21__device_stub__kernelif> : !cir.func<(!s32i, !cir.float)>
+// HIP-NEW-LABEL: cir.func {{.*}} @_Z21__device_stub__kernelif
+// HIP-NEW: cir.alloca !cir.ptr<!rec_hipStream>, {{.*}} ["stream"]
+// HIP-NEW: cir.call @__hipPopCallConfiguration({{.*}}) : (!cir.ptr<!rec_dim3>, !cir.ptr<!rec_dim3>, !cir.ptr<!u64i>, !cir.ptr<!cir.ptr<!rec_hipStream>>) -> !s32i
+// HIP-NEW: cir.get_global @_Z6kernelif : !cir.ptr<!cir.func<(!s32i, !cir.float)>>
+// HIP-NEW: cir.call @hipLaunchKernel({{.*}}) : (!cir.ptr<!void>, !rec_dim3, !rec_dim3, !cir.ptr<!cir.ptr<!void>>, !u64i, !cir.ptr<!rec_hipStream>) -> (!u32i {llvm.noundef})
__global__ void kernel(int x, float y) {}
More information about the cfe-commits
mailing list