[clang] [llvm] [AMDGPU][Clang] Add check of size for __builtin_amdgcn_global_load_lds (PR #93064)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Wed May 22 09:51:52 PDT 2024
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/93064
>From 630adf5665f6e030f924b8da864b139382c30dea Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Wed, 22 May 2024 12:51:42 -0400
Subject: [PATCH] [AMDGPU][Clang] Add check of size for
__builtin_amdgcn_global_load_lds
---
clang/lib/CodeGen/CGBuiltin.cpp | 101 +++++++++++-------
.../SemaOpenCL/builtins-amdgcn-gfx940-err.cl | 9 ++
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 22 ++--
3 files changed, 83 insertions(+), 49 deletions(-)
create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index ba94bf89e4751..b39b9d4f0ae85 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -2537,6 +2537,47 @@ static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
return RValue::get(CGF->Builder.CreateCall(UBF, Args));
}
+static void buildInstrinsicCallArgs(CodeGenFunction &CGF, const CallExpr *E,
+ unsigned BuiltinID,
+ Function *Callee,
+ SmallVectorImpl<Value *> &Args) {
+ // Find out if any arguments are required to be integer constant
+ // expressions.
+ unsigned ICEArguments = 0;
+ ASTContext::GetBuiltinTypeError Error;
+ CGF.getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
+ assert(Error == ASTContext::GE_None && "Should not codegen an error");
+
+ llvm::FunctionType *FTy = Callee->getFunctionType();
+
+ for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
+ Value *ArgValue = CGF.EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
+ // If the intrinsic arg type is different from the builtin arg type
+ // we need to do a bit cast.
+ llvm::Type *PTy = FTy->getParamType(i);
+ if (PTy != ArgValue->getType()) {
+ // XXX - vector of pointers?
+ if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
+ if (PtrTy->getAddressSpace() !=
+ ArgValue->getType()->getPointerAddressSpace()) {
+ ArgValue = CGF.Builder.CreateAddrSpaceCast(
+ ArgValue, llvm::PointerType::get(CGF.getLLVMContext(),
+ PtrTy->getAddressSpace()));
+ }
+ }
+ // Cast vector type (e.g., v256i32) to x86_amx, this only happen
+ // in amx intrinsics.
+ if (PTy->isX86_AMXTy())
+ ArgValue =
+ CGF.Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
+ {ArgValue->getType()}, {ArgValue});
+ else
+ ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);
+ }
+ Args.push_back(ArgValue);
+ }
+}
+
RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
const CallExpr *E,
ReturnValueSlot ReturnValue) {
@@ -6024,44 +6065,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
if (IntrinsicID != Intrinsic::not_intrinsic) {
SmallVector<Value*, 16> Args;
-
- // Find out if any arguments are required to be integer constant
- // expressions.
- unsigned ICEArguments = 0;
- ASTContext::GetBuiltinTypeError Error;
- getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
- assert(Error == ASTContext::GE_None && "Should not codegen an error");
-
Function *F = CGM.getIntrinsic(IntrinsicID);
- llvm::FunctionType *FTy = F->getFunctionType();
-
- for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
- Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);
- // If the intrinsic arg type is different from the builtin arg type
- // we need to do a bit cast.
- llvm::Type *PTy = FTy->getParamType(i);
- if (PTy != ArgValue->getType()) {
- // XXX - vector of pointers?
- if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
- if (PtrTy->getAddressSpace() !=
- ArgValue->getType()->getPointerAddressSpace()) {
- ArgValue = Builder.CreateAddrSpaceCast(
- ArgValue, llvm::PointerType::get(getLLVMContext(),
- PtrTy->getAddressSpace()));
- }
- }
-
- // Cast vector type (e.g., v256i32) to x86_amx, this only happen
- // in amx intrinsics.
- if (PTy->isX86_AMXTy())
- ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,
- {ArgValue->getType()}, {ArgValue});
- else
- ArgValue = Builder.CreateBitCast(ArgValue, PTy);
- }
-
- Args.push_back(ArgValue);
- }
+ buildInstrinsicCallArgs(*this, E, BuiltinID, F, Args);
Value *V = Builder.CreateCall(F, Args);
QualType BuiltinRetType = E->getType();
@@ -19040,6 +19045,28 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});
return Builder.CreateCall(F, {Arg});
}
+ case AMDGPU::BI__builtin_amdgcn_global_load_lds: {
+ SmallVector<Value *, 5> Args;
+ Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_global_load_lds);
+ buildInstrinsicCallArgs(*this, E, BuiltinID, F, Args);
+ constexpr const int SizeIdx = 2;
+ ConstantInt *SizeVal = dyn_cast<ConstantInt>(Args[SizeIdx]);
+ if (!SizeVal) {
+ CGM.Error(E->getExprLoc(), "size must be a constant");
+ return nullptr;
+ }
+ uint64_t Size = SizeVal->getZExtValue();
+ switch (Size) {
+ default:
+ CGM.Error(E->getExprLoc(), "size must be a 1/2/4");
+ return nullptr;
+ case 1:
+ case 2:
+ case 4:
+ break;
+ }
+ return Builder.CreateCall(F, Args);
+ }
default:
return nullptr;
}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl b/clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl
new file mode 100644
index 0000000000000..96df07ebf96b6
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx940 -S -verify -o - %s
+// REQUIRES: amdgpu-registered-target
+
+typedef unsigned int u32;
+
+void test_global_load_lds_unsupported_size(global u32* src, local u32 *dst, u32 size) {
+ __builtin_amdgcn_global_load_lds(src, dst, size, /*offset=*/0, /*aux=*/0); // expected-error{{size must be a constant}} expected-error{{cannot compile this builtin function yet}}
+ __builtin_amdgcn_global_load_lds(src, dst, /*size=*/5, /*offset=*/0, /*aux=*/0); // expected-error {{size must be a 1/2/4}} expected-error{{cannot compile this builtin function yet}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 0b774b724d0c0..82e3ecd268190 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2466,20 +2466,18 @@ def int_amdgcn_perm :
// GFX9 Intrinsics
//===----------------------------------------------------------------------===//
-class AMDGPUGlobalLoadLDS :
- ClangBuiltin<"__builtin_amdgcn_global_load_lds">,
- Intrinsic <
- [],
- [LLVMQualPointerType<1>, // Base global pointer to load from
- LLVMQualPointerType<3>, // LDS base pointer to store to
- llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950)
- llvm_i32_ty, // imm offset (applied to both global and LDS address)
- llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = sc0,
+class AMDGPUGlobalLoadLDS : Intrinsic <
+ [],
+ [LLVMQualPointerType<1>, // Base global pointer to load from
+ LLVMQualPointerType<3>, // LDS base pointer to store to
+ llvm_i32_ty, // Data byte size: 1/2/4
+ llvm_i32_ty, // imm offset (applied to both global and LDS address)
+ llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = sc0,
// bit 1 = sc1,
// bit 4 = scc))
- [IntrWillReturn, NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
- ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree],
- "", [SDNPMemOperand]>;
+ [IntrWillReturn, NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>,
+ ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree],
+ "", [SDNPMemOperand]>;
def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS;
//===----------------------------------------------------------------------===//
More information about the llvm-commits
mailing list