[clang] [llvm] [WIP][OpenMP] Remove dependency on `libffi` from offloading runtime (PR #91264)
via cfe-commits
cfe-commits at lists.llvm.org
Mon May 6 12:58:47 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-codegen
Author: Joseph Huber (jhuber6)
<details>
<summary>Changes</summary>
Summary:
This patch attempts to remove the dependency on `libffi` by instead
emitting the host / CPU kernels using an aggregate struct made from the
captured context. This callows us to have a fixed function prototype we
can call directly rather than requiring an extra library to decode the
ABI to call a function with N (non variadic) arguments.
NOTE:
This currently fails for tests using a non-constant value for
`num_teams` on the CPU. It seems that these use a method called
`CGF.EmitScalarExpr(NumTeams)` which doesn't seem to work correctly
with the created aggregate struct.
---
Patch is 1.07 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/91264.diff
38 Files Affected:
- (modified) clang/lib/CodeGen/CGOpenMPRuntime.cpp (+6-2)
- (modified) clang/lib/CodeGen/CGStmtOpenMP.cpp (+126)
- (modified) clang/lib/CodeGen/CodeGenFunction.h (+3)
- (modified) clang/test/OpenMP/declare_target_codegen.cpp (+3-3)
- (modified) clang/test/OpenMP/declare_target_link_codegen.cpp (+1-1)
- (modified) clang/test/OpenMP/distribute_codegen.cpp (+90-76)
- (modified) clang/test/OpenMP/distribute_simd_codegen.cpp (+196-160)
- (modified) clang/test/OpenMP/openmp_offload_codegen.cpp (+1-1)
- (modified) clang/test/OpenMP/target_firstprivate_codegen.cpp (+704-644)
- (modified) clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp (+170-102)
- (modified) clang/test/OpenMP/target_parallel_codegen.cpp (+264-210)
- (modified) clang/test/OpenMP/target_parallel_for_codegen.cpp (+306-240)
- (modified) clang/test/OpenMP/target_parallel_for_simd_codegen.cpp (+638-498)
- (modified) clang/test/OpenMP/target_parallel_generic_loop_codegen-2.cpp (+48-28)
- (modified) clang/test/OpenMP/target_parallel_if_codegen.cpp (+178-106)
- (modified) clang/test/OpenMP/target_parallel_num_threads_codegen.cpp (+154-94)
- (modified) clang/test/OpenMP/target_private_codegen.cpp (+361-116)
- (modified) clang/test/OpenMP/target_reduction_codegen.cpp (+290-106)
- (modified) clang/test/OpenMP/target_task_affinity_codegen.cpp (+72-70)
- (modified) clang/test/OpenMP/target_teams_codegen.cpp (+402-298)
- (modified) clang/test/OpenMP/target_teams_distribute_codegen.cpp (+330-256)
- (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp (+82-60)
- (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp (+160-139)
- (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp (+56-41)
- (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp (+110-80)
- (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp (+160-139)
- (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp (+56-41)
- (modified) clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp (+652-504)
- (modified) clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp (+82-60)
- (modified) clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp (+40-25)
- (modified) clang/test/OpenMP/target_teams_map_codegen.cpp (+170-142)
- (modified) clang/test/OpenMP/target_teams_num_teams_codegen.cpp (+154-94)
- (modified) clang/test/OpenMP/target_teams_thread_limit_codegen.cpp (+164-100)
- (modified) clang/test/OpenMP/teams_codegen.cpp (+104-60)
- (modified) offload/plugins-nextgen/host/CMakeLists.txt (-13)
- (removed) offload/plugins-nextgen/host/dynamic_ffi/ffi.cpp (-75)
- (removed) offload/plugins-nextgen/host/dynamic_ffi/ffi.h (-78)
- (modified) offload/plugins-nextgen/host/src/rtl.cpp (+23-18)
``````````diff
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index e39c7c58d2780e..3cd4bcff2f5852 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -5932,12 +5932,16 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
CodeGenFunction CGF(CGM, true);
llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
- [&CGF, &D, &CodeGen](StringRef EntryFnName) {
+ [&CGF, &D, &CodeGen, this](StringRef EntryFnName) {
const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
- return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
+ if (CGM.getLangOpts().OpenMPIsTargetDevice && !isGPU())
+ return CGF.GenerateOpenMPCapturedStmtFunctionAggregate(
+ CS, D.getBeginLoc());
+ else
+ return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
};
OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index ef3aa3a8e0dc61..b9d27815a8ae24 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -613,6 +613,102 @@ static llvm::Function *emitOutlinedFunctionPrologue(
return F;
}
+static llvm::Function *emitOutlinedFunctionPrologueAggregate(
+ CodeGenFunction &CGF, FunctionArgList &Args,
+ llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
+ &LocalAddrs,
+ llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
+ &VLASizes,
+ llvm::Value *&CXXThisValue, const CapturedStmt &CS, SourceLocation Loc,
+ StringRef FunctionName) {
+ const CapturedDecl *CD = CS.getCapturedDecl();
+ const RecordDecl *RD = CS.getCapturedRecordDecl();
+
+ CXXThisValue = nullptr;
+ // Build the argument list.
+ CodeGenModule &CGM = CGF.CGM;
+ ASTContext &Ctx = CGM.getContext();
+ Args.append(CD->param_begin(), CD->param_end());
+
+ // Create the function declaration.
+ const CGFunctionInfo &FuncInfo =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
+ llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
+
+ auto *F =
+ llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
+ FunctionName, &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
+ if (CD->isNothrow())
+ F->setDoesNotThrow();
+ F->setDoesNotRecurse();
+
+ // Generate the function.
+ CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, Loc, Loc);
+ Address ContextAddr = CGF.GetAddrOfLocalVar(CD->getContextParam());
+ llvm::Value *ContextV = CGF.Builder.CreateLoad(ContextAddr);
+ LValue ContextLV = CGF.MakeNaturalAlignAddrLValue(
+ ContextV, CGM.getContext().getTagDeclType(RD));
+ auto I = CS.captures().begin();
+ for (const FieldDecl *FD : RD->fields()) {
+ LValue FieldLV = CGF.EmitLValueForFieldInitialization(ContextLV, FD);
+ // Do not map arguments if we emit function with non-original types.
+ Address LocalAddr = FieldLV.getAddress(CGF);
+ // If we are capturing a pointer by copy we don't need to do anything, just
+ // use the value that we get from the arguments.
+ if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
+ const VarDecl *CurVD = I->getCapturedVar();
+ LocalAddrs.insert({FD, {CurVD, LocalAddr}});
+ ++I;
+ continue;
+ }
+
+ LValue ArgLVal =
+ CGF.MakeAddrLValue(LocalAddr, FD->getType(), AlignmentSource::Decl);
+ if (FD->hasCapturedVLAType()) {
+ llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
+ const VariableArrayType *VAT = FD->getCapturedVLAType();
+ VLASizes.try_emplace(FD, VAT->getSizeExpr(), ExprArg);
+ } else if (I->capturesVariable()) {
+ const VarDecl *Var = I->getCapturedVar();
+ QualType VarTy = Var->getType();
+ Address ArgAddr = ArgLVal.getAddress(CGF);
+ if (ArgLVal.getType()->isLValueReferenceType()) {
+ ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
+ } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
+ assert(ArgLVal.getType()->isPointerType());
+ ArgAddr = CGF.EmitLoadOfPointer(
+ ArgAddr, ArgLVal.getType()->castAs<PointerType>());
+ }
+ LocalAddrs.insert(
+ {FD,
+ {Var, Address(ArgAddr.getBasePointer(), ArgAddr.getElementType(),
+ Ctx.getDeclAlign(Var))}});
+ } else if (I->capturesVariableByCopy()) {
+ assert(!FD->getType()->isAnyPointerType() &&
+ "Not expecting a captured pointer.");
+ const VarDecl *Var = I->getCapturedVar();
+ Address CopyAddr = CGF.CreateMemTemp(FD->getType(), Ctx.getDeclAlign(FD),
+ Var->getName());
+ LValue CopyLVal =
+ CGF.MakeAddrLValue(CopyAddr, FD->getType(), AlignmentSource::Decl);
+
+ RValue ArgRVal = CGF.EmitLoadOfLValue(ArgLVal, I->getLocation());
+ CGF.EmitStoreThroughLValue(ArgRVal, CopyLVal);
+
+ LocalAddrs.insert({FD, {Var, CopyAddr}});
+ } else {
+ // If 'this' is captured, load it into CXXThisValue.
+ assert(I->capturesThis());
+ CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
+ LocalAddrs.insert({FD, {nullptr, ArgLVal.getAddress(CGF)}});
+ }
+ ++I;
+ }
+
+ return F;
+}
+
llvm::Function *
CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
SourceLocation Loc) {
@@ -695,6 +791,36 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
return WrapperF;
}
+llvm::Function *CodeGenFunction::GenerateOpenMPCapturedStmtFunctionAggregate(
+ const CapturedStmt &S, SourceLocation Loc) {
+ assert(
+ CapturedStmtInfo &&
+ "CapturedStmtInfo should be set when generating the captured function");
+ const CapturedDecl *CD = S.getCapturedDecl();
+ // Build the argument list.
+ FunctionArgList Args;
+ llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
+ llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
+ StringRef FunctionName = CapturedStmtInfo->getHelperName();
+ llvm::Function *F = emitOutlinedFunctionPrologueAggregate(
+ *this, Args, LocalAddrs, VLASizes, CXXThisValue, S, Loc, FunctionName);
+ CodeGenFunction::OMPPrivateScope LocalScope(*this);
+ for (const auto &LocalAddrPair : LocalAddrs) {
+ if (LocalAddrPair.second.first) {
+ LocalScope.addPrivate(LocalAddrPair.second.first,
+ LocalAddrPair.second.second);
+ }
+ }
+ (void)LocalScope.Privatize();
+ for (const auto &VLASizePair : VLASizes)
+ VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
+ PGO.assignRegionCounters(GlobalDecl(CD), F);
+ CapturedStmtInfo->EmitBody(*this, CD->getBody());
+ (void)LocalScope.ForceCleanup();
+ FinishFunction(CD->getBodyRBrace());
+ return F;
+}
+
//===----------------------------------------------------------------------===//
// OpenMP Directive Emission
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index e1e687af6a781b..4ad4b96767f795 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3639,6 +3639,9 @@ class CodeGenFunction : public CodeGenTypeCache {
Address GenerateCapturedStmtArgument(const CapturedStmt &S);
llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
SourceLocation Loc);
+ llvm::Function *
+ GenerateOpenMPCapturedStmtFunctionAggregate(const CapturedStmt &S,
+ SourceLocation Loc);
void GenerateOpenMPCapturedVars(const CapturedStmt &S,
SmallVectorImpl<llvm::Value *> &CapturedVars);
void emitOMPSimpleStore(LValue LVal, RValue RVal, QualType RValTy,
diff --git a/clang/test/OpenMP/declare_target_codegen.cpp b/clang/test/OpenMP/declare_target_codegen.cpp
index ba93772ede3e8e..81116c6617b5bd 100644
--- a/clang/test/OpenMP/declare_target_codegen.cpp
+++ b/clang/test/OpenMP/declare_target_codegen.cpp
@@ -150,7 +150,7 @@ int bar() { return 1 + foo() + bar() + baz1() + baz2(); }
int maini1() {
int a;
static long aa = 32 + bbb + ccc + fff + ggg;
-// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}maini1{{.*}}_l[[@LINE+1]](ptr {{.*}}, ptr noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %{{.*}}, i64 {{.*}}, i64 {{.*}})
+// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}maini1{{.*}}_l[[@LINE+1]](ptr {{.*}}, ptr {{.*}})
#pragma omp target map(tofrom \
: a, b)
{
@@ -163,7 +163,7 @@ int maini1() {
int baz3() { return 2 + baz2(); }
int baz2() {
-// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}baz2{{.*}}_l[[@LINE+1]](ptr {{.*}}, i64 {{.*}})
+// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}baz2{{.*}}_l[[@LINE+1]](ptr {{.*}}, ptr {{.*}})
#pragma omp target parallel
++c;
return 2 + baz3();
@@ -175,7 +175,7 @@ static __typeof(create) __t_create __attribute__((__weakref__("__create")));
int baz5() {
bool a;
-// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}baz5{{.*}}_l[[@LINE+1]](ptr {{.*}}, i64 {{.*}})
+// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}baz5{{.*}}_l[[@LINE+1]](ptr {{.*}}, ptr {{.*}})
#pragma omp target
a = __extension__(void *) & __t_create != 0;
return a;
diff --git a/clang/test/OpenMP/declare_target_link_codegen.cpp b/clang/test/OpenMP/declare_target_link_codegen.cpp
index 189c9ac59c153c..ba63a4bc543476 100644
--- a/clang/test/OpenMP/declare_target_link_codegen.cpp
+++ b/clang/test/OpenMP/declare_target_link_codegen.cpp
@@ -52,7 +52,7 @@ int maini1() {
return 0;
}
-// DEVICE: define weak_odr protected void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l44(ptr {{[^,]+}}, ptr noundef nonnull align {{[0-9]+}} dereferenceable{{[^,]*}}
+// DEVICE: define weak_odr protected void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l44(ptr {{[^,]+}}, ptr {{[^,]*}}
// DEVICE: [[C_REF:%.+]] = load ptr, ptr @c_decl_tgt_ref_ptr,
// DEVICE: [[C:%.+]] = load i32, ptr [[C_REF]],
// DEVICE: store i32 [[C]], ptr %
diff --git a/clang/test/OpenMP/distribute_codegen.cpp b/clang/test/OpenMP/distribute_codegen.cpp
index 34d14c89fedaed..aaa28980839668 100644
--- a/clang/test/OpenMP/distribute_codegen.cpp
+++ b/clang/test/OpenMP/distribute_codegen.cpp
@@ -1947,19 +1947,18 @@ int fint(void) { return ftemplate<int>(); }
//
//
// CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l56
-// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK17-NEXT: entry:
// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8
+// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
// CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8
-// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l56.omp_outlined, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]])
+// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
+// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
+// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1
+// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2
+// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3
+// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l56.omp_outlined, ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]])
// CHECK17-NEXT: ret void
//
//
@@ -2058,19 +2057,18 @@ int fint(void) { return ftemplate<int>(); }
//
//
// CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l68
-// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] {
+// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] {
// CHECK17-NEXT: entry:
// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8
+// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
// CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8
-// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l68.omp_outlined, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]])
+// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
+// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
+// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1
+// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2
+// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3
+// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l68.omp_outlined, ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]])
// CHECK17-NEXT: ret void
//
//
@@ -2169,19 +2167,18 @@ int fint(void) { return ftemplate<int>(); }
//
//
// CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l80
-// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] {
+// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] {
// CHECK17-NEXT: entry:
// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8
+// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
// CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8
-// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l80.omp_outlined, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]])
+// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
+// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
+// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1
+// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2
+// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3
+// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l80.omp_outlined, ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]])
// CHECK17-NEXT: ret void
//
//
@@ -2297,13 +2294,18 @@ int fint(void) { return ftemplate<int>(); }
//
//
// CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l92
-// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] {
// CHECK17-NEXT: entry:
// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
+// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
+// CHECK17-NEXT: [[A:%.*]] = alloca i8, align 1
// CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l92.omp_outlined, ptr [[A_ADDR]])
+// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
+// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
+// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK17-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1
+// CHECK17-NEXT: store i8 [[TMP2]], ptr [[A]], align 1
+// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l92.omp_outlined, ptr [[A]])
// CHECK17-NEXT: ret void
//
//
@@ -2401,13 +2403,18 @@ int fint(void) { return ftemplate<int>(); }
//
//
// CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l108
-// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] {
+// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] {
// CHECK17-NEXT: entry:
// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
+// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
+// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2
// CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l108.omp_outlined, ptr [[AA_ADDR]])
+// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
+// C...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/91264
More information about the cfe-commits
mailing list