[clang] [llvm] [WIP][OpenMP] Remove dependency on `libffi` from offloading runtime (PR #91264)

via cfe-commits cfe-commits at lists.llvm.org
Mon May 6 12:58:47 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-codegen

Author: Joseph Huber (jhuber6)

<details>
<summary>Changes</summary>

Summary:
This patch attempts to remove the dependency on `libffi` by instead
emitting the host / CPU kernels using an aggregate struct made from the
captured context. This callows us to have a fixed function prototype we
can call directly rather than requiring an extra library to decode the
ABI to call a function with N (non variadic) arguments.

NOTE:
  This currently fails for tests using a non-constant value for
  `num_teams` on the CPU. It seems that these use a method called
  `CGF.EmitScalarExpr(NumTeams)` which doesn't seem to work correctly
  with the created aggregate struct.


---

Patch is 1.07 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/91264.diff


38 Files Affected:

- (modified) clang/lib/CodeGen/CGOpenMPRuntime.cpp (+6-2) 
- (modified) clang/lib/CodeGen/CGStmtOpenMP.cpp (+126) 
- (modified) clang/lib/CodeGen/CodeGenFunction.h (+3) 
- (modified) clang/test/OpenMP/declare_target_codegen.cpp (+3-3) 
- (modified) clang/test/OpenMP/declare_target_link_codegen.cpp (+1-1) 
- (modified) clang/test/OpenMP/distribute_codegen.cpp (+90-76) 
- (modified) clang/test/OpenMP/distribute_simd_codegen.cpp (+196-160) 
- (modified) clang/test/OpenMP/openmp_offload_codegen.cpp (+1-1) 
- (modified) clang/test/OpenMP/target_firstprivate_codegen.cpp (+704-644) 
- (modified) clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp (+170-102) 
- (modified) clang/test/OpenMP/target_parallel_codegen.cpp (+264-210) 
- (modified) clang/test/OpenMP/target_parallel_for_codegen.cpp (+306-240) 
- (modified) clang/test/OpenMP/target_parallel_for_simd_codegen.cpp (+638-498) 
- (modified) clang/test/OpenMP/target_parallel_generic_loop_codegen-2.cpp (+48-28) 
- (modified) clang/test/OpenMP/target_parallel_if_codegen.cpp (+178-106) 
- (modified) clang/test/OpenMP/target_parallel_num_threads_codegen.cpp (+154-94) 
- (modified) clang/test/OpenMP/target_private_codegen.cpp (+361-116) 
- (modified) clang/test/OpenMP/target_reduction_codegen.cpp (+290-106) 
- (modified) clang/test/OpenMP/target_task_affinity_codegen.cpp (+72-70) 
- (modified) clang/test/OpenMP/target_teams_codegen.cpp (+402-298) 
- (modified) clang/test/OpenMP/target_teams_distribute_codegen.cpp (+330-256) 
- (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp (+82-60) 
- (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp (+160-139) 
- (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp (+56-41) 
- (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp (+110-80) 
- (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp (+160-139) 
- (modified) clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp (+56-41) 
- (modified) clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp (+652-504) 
- (modified) clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp (+82-60) 
- (modified) clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp (+40-25) 
- (modified) clang/test/OpenMP/target_teams_map_codegen.cpp (+170-142) 
- (modified) clang/test/OpenMP/target_teams_num_teams_codegen.cpp (+154-94) 
- (modified) clang/test/OpenMP/target_teams_thread_limit_codegen.cpp (+164-100) 
- (modified) clang/test/OpenMP/teams_codegen.cpp (+104-60) 
- (modified) offload/plugins-nextgen/host/CMakeLists.txt (-13) 
- (removed) offload/plugins-nextgen/host/dynamic_ffi/ffi.cpp (-75) 
- (removed) offload/plugins-nextgen/host/dynamic_ffi/ffi.h (-78) 
- (modified) offload/plugins-nextgen/host/src/rtl.cpp (+23-18) 


``````````diff
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index e39c7c58d2780e..3cd4bcff2f5852 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -5932,12 +5932,16 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
 
   CodeGenFunction CGF(CGM, true);
   llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
-      [&CGF, &D, &CodeGen](StringRef EntryFnName) {
+      [&CGF, &D, &CodeGen, this](StringRef EntryFnName) {
         const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
 
         CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
-        return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
+        if (CGM.getLangOpts().OpenMPIsTargetDevice && !isGPU())
+          return CGF.GenerateOpenMPCapturedStmtFunctionAggregate(
+              CS, D.getBeginLoc());
+        else
+          return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
       };
 
   OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index ef3aa3a8e0dc61..b9d27815a8ae24 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -613,6 +613,102 @@ static llvm::Function *emitOutlinedFunctionPrologue(
   return F;
 }
 
+static llvm::Function *emitOutlinedFunctionPrologueAggregate(
+    CodeGenFunction &CGF, FunctionArgList &Args,
+    llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
+        &LocalAddrs,
+    llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
+        &VLASizes,
+    llvm::Value *&CXXThisValue, const CapturedStmt &CS, SourceLocation Loc,
+    StringRef FunctionName) {
+  const CapturedDecl *CD = CS.getCapturedDecl();
+  const RecordDecl *RD = CS.getCapturedRecordDecl();
+
+  CXXThisValue = nullptr;
+  // Build the argument list.
+  CodeGenModule &CGM = CGF.CGM;
+  ASTContext &Ctx = CGM.getContext();
+  Args.append(CD->param_begin(), CD->param_end());
+
+  // Create the function declaration.
+  const CGFunctionInfo &FuncInfo =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
+  llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
+
+  auto *F =
+      llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
+                             FunctionName, &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
+  if (CD->isNothrow())
+    F->setDoesNotThrow();
+  F->setDoesNotRecurse();
+
+  // Generate the function.
+  CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, Loc, Loc);
+  Address ContextAddr = CGF.GetAddrOfLocalVar(CD->getContextParam());
+  llvm::Value *ContextV = CGF.Builder.CreateLoad(ContextAddr);
+  LValue ContextLV = CGF.MakeNaturalAlignAddrLValue(
+      ContextV, CGM.getContext().getTagDeclType(RD));
+  auto I = CS.captures().begin();
+  for (const FieldDecl *FD : RD->fields()) {
+    LValue FieldLV = CGF.EmitLValueForFieldInitialization(ContextLV, FD);
+    // Do not map arguments if we emit function with non-original types.
+    Address LocalAddr = FieldLV.getAddress(CGF);
+    // If we are capturing a pointer by copy we don't need to do anything, just
+    // use the value that we get from the arguments.
+    if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
+      const VarDecl *CurVD = I->getCapturedVar();
+      LocalAddrs.insert({FD, {CurVD, LocalAddr}});
+      ++I;
+      continue;
+    }
+
+    LValue ArgLVal =
+        CGF.MakeAddrLValue(LocalAddr, FD->getType(), AlignmentSource::Decl);
+    if (FD->hasCapturedVLAType()) {
+      llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
+      const VariableArrayType *VAT = FD->getCapturedVLAType();
+      VLASizes.try_emplace(FD, VAT->getSizeExpr(), ExprArg);
+    } else if (I->capturesVariable()) {
+      const VarDecl *Var = I->getCapturedVar();
+      QualType VarTy = Var->getType();
+      Address ArgAddr = ArgLVal.getAddress(CGF);
+      if (ArgLVal.getType()->isLValueReferenceType()) {
+        ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
+      } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
+        assert(ArgLVal.getType()->isPointerType());
+        ArgAddr = CGF.EmitLoadOfPointer(
+            ArgAddr, ArgLVal.getType()->castAs<PointerType>());
+      }
+      LocalAddrs.insert(
+          {FD,
+           {Var, Address(ArgAddr.getBasePointer(), ArgAddr.getElementType(),
+                         Ctx.getDeclAlign(Var))}});
+    } else if (I->capturesVariableByCopy()) {
+      assert(!FD->getType()->isAnyPointerType() &&
+             "Not expecting a captured pointer.");
+      const VarDecl *Var = I->getCapturedVar();
+      Address CopyAddr = CGF.CreateMemTemp(FD->getType(), Ctx.getDeclAlign(FD),
+                                           Var->getName());
+      LValue CopyLVal =
+          CGF.MakeAddrLValue(CopyAddr, FD->getType(), AlignmentSource::Decl);
+
+      RValue ArgRVal = CGF.EmitLoadOfLValue(ArgLVal, I->getLocation());
+      CGF.EmitStoreThroughLValue(ArgRVal, CopyLVal);
+
+      LocalAddrs.insert({FD, {Var, CopyAddr}});
+    } else {
+      // If 'this' is captured, load it into CXXThisValue.
+      assert(I->capturesThis());
+      CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
+      LocalAddrs.insert({FD, {nullptr, ArgLVal.getAddress(CGF)}});
+    }
+    ++I;
+  }
+
+  return F;
+}
+
 llvm::Function *
 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
                                                     SourceLocation Loc) {
@@ -695,6 +791,36 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
   return WrapperF;
 }
 
+llvm::Function *CodeGenFunction::GenerateOpenMPCapturedStmtFunctionAggregate(
+    const CapturedStmt &S, SourceLocation Loc) {
+  assert(
+      CapturedStmtInfo &&
+      "CapturedStmtInfo should be set when generating the captured function");
+  const CapturedDecl *CD = S.getCapturedDecl();
+  // Build the argument list.
+  FunctionArgList Args;
+  llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
+  llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
+  StringRef FunctionName = CapturedStmtInfo->getHelperName();
+  llvm::Function *F = emitOutlinedFunctionPrologueAggregate(
+      *this, Args, LocalAddrs, VLASizes, CXXThisValue, S, Loc, FunctionName);
+  CodeGenFunction::OMPPrivateScope LocalScope(*this);
+  for (const auto &LocalAddrPair : LocalAddrs) {
+    if (LocalAddrPair.second.first) {
+      LocalScope.addPrivate(LocalAddrPair.second.first,
+                            LocalAddrPair.second.second);
+    }
+  }
+  (void)LocalScope.Privatize();
+  for (const auto &VLASizePair : VLASizes)
+    VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
+  PGO.assignRegionCounters(GlobalDecl(CD), F);
+  CapturedStmtInfo->EmitBody(*this, CD->getBody());
+  (void)LocalScope.ForceCleanup();
+  FinishFunction(CD->getBodyRBrace());
+  return F;
+}
+
 //===----------------------------------------------------------------------===//
 //                              OpenMP Directive Emission
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index e1e687af6a781b..4ad4b96767f795 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3639,6 +3639,9 @@ class CodeGenFunction : public CodeGenTypeCache {
   Address GenerateCapturedStmtArgument(const CapturedStmt &S);
   llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
                                                      SourceLocation Loc);
+  llvm::Function *
+  GenerateOpenMPCapturedStmtFunctionAggregate(const CapturedStmt &S,
+                                              SourceLocation Loc);
   void GenerateOpenMPCapturedVars(const CapturedStmt &S,
                                   SmallVectorImpl<llvm::Value *> &CapturedVars);
   void emitOMPSimpleStore(LValue LVal, RValue RVal, QualType RValTy,
diff --git a/clang/test/OpenMP/declare_target_codegen.cpp b/clang/test/OpenMP/declare_target_codegen.cpp
index ba93772ede3e8e..81116c6617b5bd 100644
--- a/clang/test/OpenMP/declare_target_codegen.cpp
+++ b/clang/test/OpenMP/declare_target_codegen.cpp
@@ -150,7 +150,7 @@ int bar() { return 1 + foo() + bar() + baz1() + baz2(); }
 int maini1() {
   int a;
   static long aa = 32 + bbb + ccc + fff + ggg;
-// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}maini1{{.*}}_l[[@LINE+1]](ptr {{.*}}, ptr noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %{{.*}}, i64 {{.*}}, i64 {{.*}})
+// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}maini1{{.*}}_l[[@LINE+1]](ptr {{.*}}, ptr {{.*}})
 #pragma omp target map(tofrom \
                        : a, b)
   {
@@ -163,7 +163,7 @@ int maini1() {
 
 int baz3() { return 2 + baz2(); }
 int baz2() {
-// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}baz2{{.*}}_l[[@LINE+1]](ptr {{.*}}, i64 {{.*}})
+// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}baz2{{.*}}_l[[@LINE+1]](ptr {{.*}}, ptr {{.*}})
 #pragma omp target parallel
   ++c;
   return 2 + baz3();
@@ -175,7 +175,7 @@ static __typeof(create) __t_create __attribute__((__weakref__("__create")));
 
 int baz5() {
   bool a;
-// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}baz5{{.*}}_l[[@LINE+1]](ptr {{.*}}, i64 {{.*}})
+// CHECK-DAG: define weak_odr protected void @__omp_offloading_{{.*}}baz5{{.*}}_l[[@LINE+1]](ptr {{.*}}, ptr {{.*}})
 #pragma omp target
   a = __extension__(void *) & __t_create != 0;
   return a;
diff --git a/clang/test/OpenMP/declare_target_link_codegen.cpp b/clang/test/OpenMP/declare_target_link_codegen.cpp
index 189c9ac59c153c..ba63a4bc543476 100644
--- a/clang/test/OpenMP/declare_target_link_codegen.cpp
+++ b/clang/test/OpenMP/declare_target_link_codegen.cpp
@@ -52,7 +52,7 @@ int maini1() {
   return 0;
 }
 
-// DEVICE: define weak_odr protected void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l44(ptr {{[^,]+}}, ptr noundef nonnull align {{[0-9]+}} dereferenceable{{[^,]*}}
+// DEVICE: define weak_odr protected void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l44(ptr {{[^,]+}}, ptr {{[^,]*}}
 // DEVICE: [[C_REF:%.+]] = load ptr, ptr @c_decl_tgt_ref_ptr,
 // DEVICE: [[C:%.+]] = load i32, ptr [[C_REF]],
 // DEVICE: store i32 [[C]], ptr %
diff --git a/clang/test/OpenMP/distribute_codegen.cpp b/clang/test/OpenMP/distribute_codegen.cpp
index 34d14c89fedaed..aaa28980839668 100644
--- a/clang/test/OpenMP/distribute_codegen.cpp
+++ b/clang/test/OpenMP/distribute_codegen.cpp
@@ -1947,19 +1947,18 @@ int fint(void) { return ftemplate<int>(); }
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l56
-// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT:    [[D_ADDR:%.*]] = alloca ptr, align 8
+// CHECK17-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
 // CHECK17-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK17-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK17-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK17-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK17-NEXT:    store ptr [[D]], ptr [[D_ADDR]], align 8
-// CHECK17-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l56.omp_outlined, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]])
+// CHECK17-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
+// CHECK17-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
+// CHECK17-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3
+// CHECK17-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l56.omp_outlined, ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]])
 // CHECK17-NEXT:    ret void
 //
 //
@@ -2058,19 +2057,18 @@ int fint(void) { return ftemplate<int>(); }
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l68
-// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] {
+// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT:    [[D_ADDR:%.*]] = alloca ptr, align 8
+// CHECK17-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
 // CHECK17-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK17-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK17-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK17-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK17-NEXT:    store ptr [[D]], ptr [[D_ADDR]], align 8
-// CHECK17-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l68.omp_outlined, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]])
+// CHECK17-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
+// CHECK17-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
+// CHECK17-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3
+// CHECK17-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l68.omp_outlined, ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]])
 // CHECK17-NEXT:    ret void
 //
 //
@@ -2169,19 +2167,18 @@ int fint(void) { return ftemplate<int>(); }
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l80
-// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] {
+// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT:    [[D_ADDR:%.*]] = alloca ptr, align 8
+// CHECK17-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
 // CHECK17-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK17-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK17-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK17-NEXT:    store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK17-NEXT:    store ptr [[D]], ptr [[D_ADDR]], align 8
-// CHECK17-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l80.omp_outlined, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]])
+// CHECK17-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
+// CHECK17-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
+// CHECK17-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1
+// CHECK17-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2
+// CHECK17-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3
+// CHECK17-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l80.omp_outlined, ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]])
 // CHECK17-NEXT:    ret void
 //
 //
@@ -2297,13 +2294,18 @@ int fint(void) { return ftemplate<int>(); }
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l92
-// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
+// CHECK17-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
+// CHECK17-NEXT:    [[A:%.*]] = alloca i8, align 1
 // CHECK17-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK17-NEXT:    store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK17-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l92.omp_outlined, ptr [[A_ADDR]])
+// CHECK17-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
+// CHECK17-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8
+// CHECK17-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK17-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1
+// CHECK17-NEXT:    store i8 [[TMP2]], ptr [[A]], align 1
+// CHECK17-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l92.omp_outlined, ptr [[A]])
 // CHECK17-NEXT:    ret void
 //
 //
@@ -2401,13 +2403,18 @@ int fint(void) { return ftemplate<int>(); }
 //
 //
 // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l108
-// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] {
+// CHECK17-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] {
 // CHECK17-NEXT:  entry:
 // CHECK17-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK17-NEXT:    [[AA_ADDR:%.*]] = alloca i64, align 8
+// CHECK17-NEXT:    [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8
+// CHECK17-NEXT:    [[AA:%.*]] = alloca i16, align 2
 // CHECK17-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK17-NEXT:    store i64 [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK17-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l108.omp_outlined, ptr [[AA_ADDR]])
+// CHECK17-NEXT:    store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8
+// C...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/91264


More information about the cfe-commits mailing list