[clang] [CIR][AMDGPU] Add AMDGPU-specific function attributes for HIP kernels (PR #188007)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Mar 23 03:01:32 PDT 2026
https://github.com/skc7 created https://github.com/llvm/llvm-project/pull/188007
Upstreaming clangIR PR: https://github.com/llvm/clangir/pull/2091
This patch adds support for AMDGPU-specific function attributes for HIP kernels
Added setTargetAttributes for AMDGPUTargetCIRGenInfo to set kernel attributes
Added generic string attribute handler in amendFunction to translate string-values with "cir." prefix function attributes to LLVM function attributes
Follows OGCG AMDGPU implementation from "clang/lib/CodeGen/Targets/AMDGPU.cpp".
>From 8e53f91820aa1158951de2ae7beac94fcaed0545 Mon Sep 17 00:00:00 2001
From: skc7 <Krishna.Sankisa at amd.com>
Date: Mon, 23 Mar 2026 15:20:23 +0530
Subject: [PATCH] [CIR][AMDGPU] Add AMDGPU-specific function attributes for HIP
kernels
---
clang/lib/CIR/CodeGen/CIRGenModule.cpp | 14 +-
clang/lib/CIR/CodeGen/CMakeLists.txt | 1 +
clang/lib/CIR/CodeGen/TargetInfo.cpp | 10 +
clang/lib/CIR/CodeGen/TargetInfo.h | 5 +
clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp | 256 ++++++++++++++++++
.../Lowering/DirectToLLVM/LowerToLLVMIR.cpp | 27 +-
clang/test/CIR/CodeGenHIP/amdgpu-attrs.hip | 82 ++++++
7 files changed, 386 insertions(+), 9 deletions(-)
create mode 100644 clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp
create mode 100644 clang/test/CIR/CodeGenHIP/amdgpu-attrs.hip
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index f3ab733bf4c6a..4be669777bb26 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -669,7 +669,7 @@ void CIRGenModule::setNonAliasAttributes(GlobalDecl gd, mlir::Operation *op) {
assert(!cir::MissingFeatures::opFuncCPUAndFeaturesAttributes());
assert(!cir::MissingFeatures::opFuncSection());
- assert(!cir::MissingFeatures::setTargetAttributes());
+ getTargetCIRGenInfo().setTargetAttributes(gd.getDecl(), op, *this);
}
std::optional<cir::SourceLanguage> CIRGenModule::getCIRSourceLanguage() const {
@@ -2557,12 +2557,15 @@ void CIRGenModule::setFunctionAttributes(GlobalDecl globalDecl,
// represent them in dedicated ops. The correct attributes are ensured during
// translation to LLVM. Thus, we don't need to check for them here.
+ const auto *funcDecl = cast<FunctionDecl>(globalDecl.getDecl());
+
if (!isIncompleteFunction)
setCIRFunctionAttributes(globalDecl,
getTypes().arrangeGlobalDeclaration(globalDecl),
func, isThunk);
- assert(!cir::MissingFeatures::setTargetAttributes());
+ if (!isIncompleteFunction && func.isDeclaration())
+ getTargetCIRGenInfo().setTargetAttributes(funcDecl, func, *this);
// TODO(cir): This needs a lot of work to better match CodeGen. That
// ultimately ends up in setGlobalVisibility, which already has the linkage of
@@ -2574,17 +2577,16 @@ void CIRGenModule::setFunctionAttributes(GlobalDecl globalDecl,
}
// If we plan on emitting this inline builtin, we can't treat it as a builtin.
- const auto *fd = cast<FunctionDecl>(globalDecl.getDecl());
- if (fd->isInlineBuiltinDeclaration()) {
+ if (funcDecl->isInlineBuiltinDeclaration()) {
const FunctionDecl *fdBody;
- bool hasBody = fd->hasBody(fdBody);
+ bool hasBody = funcDecl->hasBody(fdBody);
(void)hasBody;
assert(hasBody && "Inline builtin declarations should always have an "
"available body!");
assert(!cir::MissingFeatures::attributeNoBuiltin());
}
- if (fd->isReplaceableGlobalAllocationFunction()) {
+ if (funcDecl->isReplaceableGlobalAllocationFunction()) {
// A replaceable global allocation function does not act like a builtin by
// default, only if it is invoked by a new-expression or delete-expression.
func->setAttr(cir::CIRDialect::getNoBuiltinAttrName(),
diff --git a/clang/lib/CIR/CodeGen/CMakeLists.txt b/clang/lib/CIR/CodeGen/CMakeLists.txt
index 8548cc8424527..9b8fdf551ef10 100644
--- a/clang/lib/CIR/CodeGen/CMakeLists.txt
+++ b/clang/lib/CIR/CodeGen/CMakeLists.txt
@@ -51,6 +51,7 @@ add_clang_library(clangCIR
CIRGenTypes.cpp
CIRGenVTables.cpp
TargetInfo.cpp
+ Targets/AMDGPU.cpp
DEPENDS
MLIRCIR
diff --git a/clang/lib/CIR/CodeGen/TargetInfo.cpp b/clang/lib/CIR/CodeGen/TargetInfo.cpp
index ee68d9c329b83..3859588c5cfaf 100644
--- a/clang/lib/CIR/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CIR/CodeGen/TargetInfo.cpp
@@ -1,6 +1,7 @@
#include "TargetInfo.h"
#include "ABIInfo.h"
#include "CIRGenFunction.h"
+#include "CIRGenModule.h"
#include "mlir/Dialect/Ptr/IR/MemorySpaceInterfaces.h"
#include "clang/CIR/Dialect/IR/CIRAttrs.h"
#include "clang/CIR/Dialect/IR/CIRDialect.h"
@@ -53,6 +54,15 @@ class AMDGPUTargetCIRGenInfo : public TargetCIRGenInfo {
public:
AMDGPUTargetCIRGenInfo(CIRGenTypes &cgt)
: TargetCIRGenInfo(std::make_unique<AMDGPUABIInfo>(cgt)) {}
+
+ void setTargetAttributes(const clang::Decl *decl, mlir::Operation *global,
+ CIRGenModule &cgm) const override {
+ auto func = mlir::dyn_cast<cir::FuncOp>(global);
+ if (!func)
+ return;
+
+ setAMDGPUTargetFunctionAttributes(decl, func, cgm);
+ }
};
} // namespace
diff --git a/clang/lib/CIR/CodeGen/TargetInfo.h b/clang/lib/CIR/CodeGen/TargetInfo.h
index b397d8cd7fab8..868af0e8343fb 100644
--- a/clang/lib/CIR/CodeGen/TargetInfo.h
+++ b/clang/lib/CIR/CodeGen/TargetInfo.h
@@ -19,6 +19,7 @@
#include "mlir/Dialect/Ptr/IR/MemorySpaceInterfaces.h"
#include "clang/Basic/AddressSpaces.h"
#include "clang/CIR/Dialect/IR/CIRAttrs.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
#include <memory>
@@ -135,6 +136,10 @@ class TargetCIRGenInfo {
std::unique_ptr<TargetCIRGenInfo>
createAMDGPUTargetCIRGenInfo(CIRGenTypes &cgt);
+/// Set AMDGPU-specific function attributes for HIP kernels.
+void setAMDGPUTargetFunctionAttributes(const clang::Decl *decl,
+ cir::FuncOp func, CIRGenModule &cgm);
+
std::unique_ptr<TargetCIRGenInfo> createX8664TargetCIRGenInfo(CIRGenTypes &cgt);
std::unique_ptr<TargetCIRGenInfo> createNVPTXTargetCIRGenInfo(CIRGenTypes &cgt);
diff --git a/clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp
new file mode 100644
index 0000000000000..280cb6ae5865c
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/Targets/AMDGPU.cpp
@@ -0,0 +1,256 @@
+//===---- AMDGPU.cpp - AMDGPU-specific CIR CodeGen ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides AMDGPU-specific CIR CodeGen logic for function attributes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../CIRGenModule.h"
+#include "../TargetInfo.h"
+
+#include "clang/AST/Attr.h"
+#include "clang/AST/Decl.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/CIR/Dialect/IR/CIRDialect.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace clang;
+using namespace clang::CIRGen;
+
+namespace {
+
+/// Check if AMDGPU protected visibility is required.
+static bool requiresAMDGPUProtectedVisibility(const clang::Decl *d,
+ cir::FuncOp func) {
+ if (func.getGlobalVisibility() != cir::VisibilityKind::Hidden)
+ return false;
+
+ if (d->hasAttr<OMPDeclareTargetDeclAttr>())
+ return false;
+
+ return d->hasAttr<DeviceKernelAttr>() ||
+ (clang::isa<clang::FunctionDecl>(d) && d->hasAttr<CUDAGlobalAttr>());
+}
+
+/// Handle amdgpu-flat-work-group-size attribute.
+static void handleAMDGPUFlatWorkGroupSizeAttr(const clang::FunctionDecl *fd,
+ cir::FuncOp func,
+ CIRGenModule &cgm,
+ bool isOpenCLKernel) {
+ auto &builder = cgm.getBuilder();
+ const auto *flatWGS = fd->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
+ const auto *reqdWGS =
+ cgm.getLangOpts().OpenCL ? fd->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
+
+ if (flatWGS || reqdWGS) {
+ unsigned min = 0, max = 0;
+ if (flatWGS) {
+ min = flatWGS->getMin()
+ ->EvaluateKnownConstInt(cgm.getASTContext())
+ .getExtValue();
+ max = flatWGS->getMax()
+ ->EvaluateKnownConstInt(cgm.getASTContext())
+ .getExtValue();
+ }
+ if (reqdWGS && min == 0 && max == 0) {
+ min = max = reqdWGS->getXDim()
+ ->EvaluateKnownConstInt(cgm.getASTContext())
+ .getExtValue() *
+ reqdWGS->getYDim()
+ ->EvaluateKnownConstInt(cgm.getASTContext())
+ .getExtValue() *
+ reqdWGS->getZDim()
+ ->EvaluateKnownConstInt(cgm.getASTContext())
+ .getExtValue();
+ }
+ if (min != 0) {
+ assert(min <= max && "Min must be less than or equal Max");
+ std::string attrVal = llvm::utostr(min) + "," + llvm::utostr(max);
+ func->setAttr("cir.amdgpu-flat-work-group-size",
+ builder.getStringAttr(attrVal));
+ } else {
+ assert(max == 0 && "Max must be zero");
+ }
+ } else {
+ const unsigned defaultMax =
+ isOpenCLKernel ? 256 : cgm.getLangOpts().GPUMaxThreadsPerBlock;
+ std::string attrVal = std::string("1,") + llvm::utostr(defaultMax);
+ func->setAttr("cir.amdgpu-flat-work-group-size",
+ builder.getStringAttr(attrVal));
+ }
+}
+
+/// Handle amdgpu-waves-per-eu attribute.
+static void handleAMDGPUWavesPerEUAttr(const clang::FunctionDecl *fd,
+ cir::FuncOp func, CIRGenModule &cgm) {
+ const auto *attr = fd->getAttr<AMDGPUWavesPerEUAttr>();
+ if (!attr)
+ return;
+
+ auto &builder = cgm.getBuilder();
+ unsigned min =
+ attr->getMin()->EvaluateKnownConstInt(cgm.getASTContext()).getExtValue();
+ unsigned max = attr->getMax()
+ ? attr->getMax()
+ ->EvaluateKnownConstInt(cgm.getASTContext())
+ .getExtValue()
+ : 0;
+
+ if (min != 0) {
+ assert((max == 0 || min <= max) && "Min must be less than or equal Max");
+ std::string attrVal = llvm::utostr(min);
+ if (max != 0)
+ attrVal = attrVal + "," + llvm::utostr(max);
+ func->setAttr("cir.amdgpu-waves-per-eu", builder.getStringAttr(attrVal));
+ } else {
+ assert(max == 0 && "Max must be zero");
+ }
+}
+
+/// Handle amdgpu-num-sgpr attribute.
+static void handleAMDGPUNumSGPRAttr(const clang::FunctionDecl *fd,
+ cir::FuncOp func, CIRGenModule &cgm) {
+ const auto *attr = fd->getAttr<AMDGPUNumSGPRAttr>();
+ if (!attr)
+ return;
+
+ uint32_t numSGPR = attr->getNumSGPR();
+ if (numSGPR != 0) {
+ auto &builder = cgm.getBuilder();
+ func->setAttr("cir.amdgpu-num-sgpr",
+ builder.getStringAttr(llvm::utostr(numSGPR)));
+ }
+}
+
+/// Handle amdgpu-num-vgpr attribute.
+static void handleAMDGPUNumVGPRAttr(const clang::FunctionDecl *fd,
+ cir::FuncOp func, CIRGenModule &cgm) {
+ const auto *attr = fd->getAttr<AMDGPUNumVGPRAttr>();
+ if (!attr)
+ return;
+
+ uint32_t numVGPR = attr->getNumVGPR();
+ if (numVGPR != 0) {
+ auto &builder = cgm.getBuilder();
+ func->setAttr("cir.amdgpu-num-vgpr",
+ builder.getStringAttr(llvm::utostr(numVGPR)));
+ }
+}
+
+/// Handle amdgpu-max-num-workgroups attribute.
+static void handleAMDGPUMaxNumWorkGroupsAttr(const clang::FunctionDecl *fd,
+ cir::FuncOp func,
+ CIRGenModule &cgm) {
+ const auto *attr = fd->getAttr<AMDGPUMaxNumWorkGroupsAttr>();
+ if (!attr)
+ return;
+
+ auto &builder = cgm.getBuilder();
+ uint32_t x = attr->getMaxNumWorkGroupsX()
+ ->EvaluateKnownConstInt(cgm.getASTContext())
+ .getExtValue();
+ uint32_t y = attr->getMaxNumWorkGroupsY()
+ ? attr->getMaxNumWorkGroupsY()
+ ->EvaluateKnownConstInt(cgm.getASTContext())
+ .getExtValue()
+ : 1;
+ uint32_t z = attr->getMaxNumWorkGroupsZ()
+ ? attr->getMaxNumWorkGroupsZ()
+ ->EvaluateKnownConstInt(cgm.getASTContext())
+ .getExtValue()
+ : 1;
+
+ llvm::SmallString<32> attrVal;
+ llvm::raw_svector_ostream os(attrVal);
+ os << x << ',' << y << ',' << z;
+ func->setAttr("cir.amdgpu-max-num-workgroups",
+ builder.getStringAttr(attrVal.str()));
+}
+
+/// Handle amdgpu-cluster-dims attribute.
+static void handleAMDGPUClusterDimsAttr(const clang::FunctionDecl *fd,
+ cir::FuncOp func, CIRGenModule &cgm,
+ bool isOpenCLKernel) {
+ auto &builder = cgm.getBuilder();
+
+ if (const auto *attr = fd->getAttr<CUDAClusterDimsAttr>()) {
+ auto getExprVal = [&](const Expr *e) {
+ return e ? e->EvaluateKnownConstInt(cgm.getASTContext()).getExtValue()
+ : 1;
+ };
+ unsigned x = getExprVal(attr->getX());
+ unsigned y = getExprVal(attr->getY());
+ unsigned z = getExprVal(attr->getZ());
+
+ llvm::SmallString<32> attrVal;
+ llvm::raw_svector_ostream os(attrVal);
+ os << x << ',' << y << ',' << z;
+ func->setAttr("cir.amdgpu-cluster-dims",
+ builder.getStringAttr(attrVal.str()));
+ }
+
+ const clang::TargetInfo &targetInfo = cgm.getASTContext().getTargetInfo();
+ if ((isOpenCLKernel &&
+ targetInfo.hasFeatureEnabled(targetInfo.getTargetOpts().FeatureMap,
+ "clusters")) ||
+ fd->hasAttr<CUDANoClusterAttr>()) {
+ func->setAttr("cir.amdgpu-cluster-dims", builder.getStringAttr("0,0,0"));
+ }
+}
+
+/// Handle amdgpu-ieee attribute.
+static void handleAMDGPUIEEEAttr(cir::FuncOp func, CIRGenModule &cgm) {
+ if (!cgm.getCodeGenOpts().EmitIEEENaNCompliantInsts) {
+ auto &builder = cgm.getBuilder();
+ func->setAttr("cir.amdgpu-ieee", builder.getStringAttr("false"));
+ }
+}
+
+} // anonymous namespace
+
+void clang::CIRGen::setAMDGPUTargetFunctionAttributes(const clang::Decl *decl,
+ cir::FuncOp func,
+ CIRGenModule &cgm) {
+ const auto *fd = clang::dyn_cast_or_null<clang::FunctionDecl>(decl);
+ if (!fd)
+ return;
+
+ if (func.isDeclaration())
+ return;
+
+ // Set protected visibility for AMDGPU kernels
+ if (requiresAMDGPUProtectedVisibility(decl, func)) {
+ func.setGlobalVisibility(cir::VisibilityKind::Protected);
+ func.setDSOLocal(true);
+ }
+
+ const bool isOpenCLKernel =
+ cgm.getLangOpts().OpenCL && fd->hasAttr<DeviceKernelAttr>();
+ const bool isHIPKernel =
+ cgm.getLangOpts().HIP && fd->hasAttr<CUDAGlobalAttr>();
+
+ if (!isOpenCLKernel && !isHIPKernel)
+ return;
+
+ // Set HIP kernel calling convention
+ if (isHIPKernel) {
+ // TODO(CIR) : Add amdgpu calling conv.
+ func.setVisibility(mlir::SymbolTable::Visibility::Public);
+ func.setLinkageAttr(cir::GlobalLinkageKindAttr::get(
+ func.getContext(), cir::GlobalLinkageKind::ExternalLinkage));
+ }
+
+ handleAMDGPUFlatWorkGroupSizeAttr(fd, func, cgm, isOpenCLKernel);
+ handleAMDGPUWavesPerEUAttr(fd, func, cgm);
+ handleAMDGPUNumSGPRAttr(fd, func, cgm);
+ handleAMDGPUNumVGPRAttr(fd, func, cgm);
+ handleAMDGPUMaxNumWorkGroupsAttr(fd, func, cgm);
+ handleAMDGPUClusterDimsAttr(fd, func, cgm, isOpenCLKernel);
+ handleAMDGPUIEEEAttr(func, cgm);
+}
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp
index 2a95cfb9371b1..dbedbb5647aa5 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVMIR.cpp
@@ -15,9 +15,7 @@
#include "mlir/IR/DialectRegistry.h"
#include "mlir/Target/LLVMIR/LLVMTranslationInterface.h"
#include "mlir/Target/LLVMIR/ModuleTranslation.h"
-#include "clang/CIR/Dialect/IR/CIRAttrs.h"
#include "clang/CIR/Dialect/IR/CIRDialect.h"
-#include "clang/CIR/MissingFeatures.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/GlobalVariable.h"
@@ -54,7 +52,11 @@ class CIRDialectLLVMIRTranslationInterface
mlir::Operation *op, llvm::ArrayRef<llvm::Instruction *> instructions,
mlir::NamedAttribute attribute,
mlir::LLVM::ModuleTranslation &moduleTranslation) const override {
- if (auto mod = dyn_cast<mlir::ModuleOp>(op)) {
+ if (auto func = dyn_cast<mlir::LLVM::LLVMFuncOp>(op)) {
+ if (mlir::failed(
+ amendFunction(func, instructions, attribute, moduleTranslation)))
+ return mlir::failure();
+ } else if (auto mod = dyn_cast<mlir::ModuleOp>(op)) {
if (mlir::failed(amendModule(mod, attribute, moduleTranslation)))
return mlir::failure();
}
@@ -62,6 +64,25 @@ class CIRDialectLLVMIRTranslationInterface
}
private:
+ // Translate CIR function attributes to LLVM function attributes.
+ mlir::LogicalResult
+ amendFunction(mlir::LLVM::LLVMFuncOp func,
+ llvm::ArrayRef<llvm::Instruction *> instructions,
+ mlir::NamedAttribute attribute,
+ mlir::LLVM::ModuleTranslation &moduleTranslation) const {
+ llvm::Function *llvmFunc = moduleTranslation.lookupFunction(func.getName());
+ llvm::StringRef attrName = attribute.getName().strref();
+
+ // Strip the "cir." prefix to get the LLVM attribute name.
+ llvm::StringRef llvmAttrName = attrName.substr(strlen("cir."));
+ if (auto strAttr = mlir::dyn_cast<mlir::StringAttr>(attribute.getValue())) {
+ llvmFunc->addFnAttr(llvmAttrName, strAttr.getValue());
+ return mlir::success();
+ }
+
+ return mlir::success();
+ }
+
// Translate CIR's module attributes to LLVM's module metadata
mlir::LogicalResult
amendModule(mlir::ModuleOp mod, mlir::NamedAttribute attribute,
diff --git a/clang/test/CIR/CodeGenHIP/amdgpu-attrs.hip b/clang/test/CIR/CodeGenHIP/amdgpu-attrs.hip
new file mode 100644
index 0000000000000..5a15f62899cf8
--- /dev/null
+++ b/clang/test/CIR/CodeGenHIP/amdgpu-attrs.hip
@@ -0,0 +1,82 @@
+#include "../CodeGenCUDA/Inputs/cuda.h"
+
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
+// RUN: -fcuda-is-device -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR %s --input-file=%t.cir
+
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
+// RUN: -fcuda-is-device -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM %s --input-file=%t.ll
+
+// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \
+// RUN: -fcuda-is-device -emit-llvm %s -o %t.ogcg.ll
+// RUN: FileCheck --check-prefix=LLVM %s --input-file=%t.ogcg.ll
+
+// Test that AMDGPU-specific attributes are generated for HIP kernels
+
+// Test: Default attributes for simple kernel
+// CIR: cir.func{{.*}} @_Z13kernel_simplev(){{.*}}"cir.amdgpu-flat-work-group-size" = "1,1024"
+// LLVM: define{{.*}} void @_Z13kernel_simplev(){{.*}} #[[SIMPLE_ATTR:[0-9]+]]
+__global__ void kernel_simple() {}
+
+// Test: Explicit flat work group size attribute
+// CIR: cir.func{{.*}} @_Z21kernel_flat_wg_size_1v(){{.*}}"cir.amdgpu-flat-work-group-size" = "64,128"
+// LLVM: define{{.*}} void @_Z21kernel_flat_wg_size_1v(){{.*}} #[[FLAT_WG_ATTR:[0-9]+]]
+__attribute__((amdgpu_flat_work_group_size(64, 128)))
+__global__ void kernel_flat_wg_size_1() {}
+
+// Test: Waves per EU attribute
+// CIR: cir.func{{.*}} @_Z19kernel_waves_per_euv(){{.*}}"cir.amdgpu-waves-per-eu" = "2"
+// LLVM: define{{.*}} void @_Z19kernel_waves_per_euv(){{.*}} #[[WAVES_ATTR:[0-9]+]]
+__attribute__((amdgpu_waves_per_eu(2)))
+__global__ void kernel_waves_per_eu() {}
+
+// Test: Waves per EU with min and max
+// CIR: cir.func{{.*}} @_Z22kernel_waves_per_eu_mmv(){{.*}}"cir.amdgpu-waves-per-eu" = "2,4"
+// LLVM: define{{.*}} void @_Z22kernel_waves_per_eu_mmv(){{.*}} #[[WAVES_MM_ATTR:[0-9]+]]
+__attribute__((amdgpu_waves_per_eu(2, 4)))
+__global__ void kernel_waves_per_eu_mm() {}
+
+// Test: Num SGPR attribute
+// CIR: cir.func{{.*}} @_Z15kernel_num_sgprv(){{.*}}"cir.amdgpu-num-sgpr" = "32"
+// LLVM: define{{.*}} void @_Z15kernel_num_sgprv(){{.*}} #[[SGPR_ATTR:[0-9]+]]
+__attribute__((amdgpu_num_sgpr(32)))
+__global__ void kernel_num_sgpr() {}
+
+// Test: Num VGPR attribute
+// CIR: cir.func{{.*}} @_Z15kernel_num_vgprv(){{.*}}"cir.amdgpu-num-vgpr" = "64"
+// LLVM: define{{.*}} void @_Z15kernel_num_vgprv(){{.*}} #[[VGPR_ATTR:[0-9]+]]
+__attribute__((amdgpu_num_vgpr(64)))
+__global__ void kernel_num_vgpr() {}
+
+// Test: Max num workgroups attribute
+// CIR: cir.func{{.*}} @_Z22kernel_max_num_wgroupsv(){{.*}}"cir.amdgpu-max-num-workgroups" = "8,4,2"
+// LLVM: define{{.*}} void @_Z22kernel_max_num_wgroupsv(){{.*}} #[[MAX_WG_ATTR:[0-9]+]]
+__attribute__((amdgpu_max_num_work_groups(8, 4, 2)))
+__global__ void kernel_max_num_wgroups() {}
+
+// Test: Combined attributes
+// CIR: cir.func{{.*}} @_Z15kernel_combinedv(){{.*}}"cir.amdgpu-flat-work-group-size" = "256,256"{{.*}}"cir.amdgpu-num-sgpr" = "48"{{.*}}"cir.amdgpu-num-vgpr" = "32"{{.*}}"cir.amdgpu-waves-per-eu" = "1,2"
+// LLVM: define{{.*}} void @_Z15kernel_combinedv(){{.*}} #[[COMBINED_ATTR:[0-9]+]]
+__attribute__((amdgpu_flat_work_group_size(256, 256)))
+__attribute__((amdgpu_waves_per_eu(1, 2)))
+__attribute__((amdgpu_num_sgpr(48)))
+__attribute__((amdgpu_num_vgpr(32)))
+__global__ void kernel_combined() {}
+
+// Test: Device function should NOT have kernel attributes
+// CIR: cir.func{{.*}} @_Z9device_fnv()
+// CIR-NOT: cir.amdgpu-flat-work-group-size
+// LLVM: define{{.*}} void @_Z9device_fnv()
+__device__ void device_fn() {}
+
+// Verify LLVM attributes
+// LLVM-DAG: attributes #[[SIMPLE_ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
+// LLVM-DAG: attributes #[[FLAT_WG_ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="64,128"
+// LLVM-DAG: attributes #[[WAVES_ATTR]] = {{.*}}"amdgpu-waves-per-eu"="2"
+// LLVM-DAG: attributes #[[WAVES_MM_ATTR]] = {{.*}}"amdgpu-waves-per-eu"="2,4"
+// LLVM-DAG: attributes #[[SGPR_ATTR]] = {{.*}}"amdgpu-num-sgpr"="32"
+// LLVM-DAG: attributes #[[VGPR_ATTR]] = {{.*}}"amdgpu-num-vgpr"="64"
+// LLVM-DAG: attributes #[[MAX_WG_ATTR]] = {{.*}}"amdgpu-max-num-workgroups"="8,4,2"
+// LLVM-DAG: attributes #[[COMBINED_ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="256,256"{{.*}}"amdgpu-num-sgpr"="48"{{.*}}"amdgpu-num-vgpr"="32"{{.*}}"amdgpu-waves-per-eu"="1,2"
More information about the cfe-commits
mailing list