[llvm-branch-commits] [clang] [llvm] [Clang][OMPX] Add the code generation for multi-dim num_teams (PR #101407)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Jul 31 13:53:43 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-codegen
Author: Shilei Tian (shiltian)
<details>
<summary>Changes</summary>
---
Patch is 228.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101407.diff
4 Files Affected:
- (modified) clang/lib/CodeGen/CGOpenMPRuntime.cpp (+22-1)
- (modified) clang/test/OpenMP/target_teams_codegen.cpp (+1225-625)
- (modified) llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h (+5-5)
- (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+18-9)
``````````diff
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index f229202ae5535..7ddb5ed640ebc 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9576,6 +9576,20 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
MappedVarSet, CombinedInfo);
genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
}
+
+static void emitNumTeamsForBareTargetDirective(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ llvm::SmallVectorImpl<llvm::Value *> &NumTeams) {
+ const auto *C = D.getSingleClause<OMPNumTeamsClause>();
+ assert(!C->varlist_empty() && "ompx_bare requires explicit num_teams");
+ CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
+ for (auto *E : C->getNumTeams()) {
+ llvm::Value *V = CGF.EmitScalarExpr(E);
+ NumTeams.push_back(
+ CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
+ }
+}
+
static void emitTargetCallKernelLaunch(
CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
const OMPExecutableDirective &D,
@@ -9645,8 +9659,15 @@ static void emitTargetCallKernelLaunch(
return CGF.Builder.saveIP();
};
+ bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
+ SmallVector<llvm::Value *, 3> NumTeams;
+ if (IsBare)
+ emitNumTeamsForBareTargetDirective(CGF, D, NumTeams);
+ else
+ NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
+
llvm::Value *DeviceID = emitDeviceID(Device, CGF);
- llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
+ // llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
llvm::Value *NumThreads =
OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
diff --git a/clang/test/OpenMP/target_teams_codegen.cpp b/clang/test/OpenMP/target_teams_codegen.cpp
index 24dc2fd2e49f4..595740c1f1314 100644
--- a/clang/test/OpenMP/target_teams_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_codegen.cpp
@@ -127,6 +127,18 @@ int foo(int n) {
aa += 1;
}
+ #pragma omp target teams ompx_bare num_teams(1, 2) thread_limit(1)
+ {
+ a += 1;
+ aa += 1;
+ }
+
+ #pragma omp target teams ompx_bare num_teams(1, 2, 3) thread_limit(1)
+ {
+ a += 1;
+ aa += 1;
+ }
+
// We capture 3 VLA sizes in this target region
@@ -348,22 +360,34 @@ int bar(int n){
// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS20:%.*]] = alloca [2 x ptr], align 8
// CHECK1-NEXT: [[KERNEL_ARGS21:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK1-NEXT: [[A_CASTED24:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [9 x ptr], align 8
-// CHECK1-NEXT: [[DOTOFFLOAD_PTRS28:%.*]] = alloca [9 x ptr], align 8
-// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [9 x ptr], align 8
+// CHECK1-NEXT: [[AA_CASTED25:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS26:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_PTRS27:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS28:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT: [[KERNEL_ARGS29:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT: [[A_CASTED32:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[AA_CASTED33:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS34:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_PTRS35:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS36:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT: [[KERNEL_ARGS37:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT: [[A_CASTED40:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS43:%.*]] = alloca [9 x ptr], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_PTRS44:%.*]] = alloca [9 x ptr], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS45:%.*]] = alloca [9 x ptr], align 8
// CHECK1-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8
-// CHECK1-NEXT: [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT: [[KERNEL_ARGS46:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK1-NEXT: [[NN:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS35:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: [[DOTOFFLOAD_PTRS36:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS37:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: [[KERNEL_ARGS38:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
-// CHECK1-NEXT: [[NN_CASTED41:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS42:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: [[DOTOFFLOAD_PTRS43:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS44:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: [[KERNEL_ARGS45:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS51:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_PTRS52:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS53:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT: [[KERNEL_ARGS54:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT: [[NN_CASTED57:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS58:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_PTRS59:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS60:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT: [[KERNEL_ARGS61:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
// CHECK1-NEXT: store i32 0, ptr [[A]], align 4
@@ -603,206 +627,312 @@ int bar(int n){
// CHECK1-NEXT: [[TMP122:%.*]] = load i32, ptr [[A]], align 4
// CHECK1-NEXT: store i32 [[TMP122]], ptr [[A_CASTED24]], align 4
// CHECK1-NEXT: [[TMP123:%.*]] = load i64, ptr [[A_CASTED24]], align 8
-// CHECK1-NEXT: [[TMP124:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[TMP124]], 20
-// CHECK1-NEXT: br i1 [[CMP25]], label [[OMP_IF_THEN26:%.*]], label [[OMP_IF_ELSE33:%.*]]
-// CHECK1: omp_if.then26:
-// CHECK1-NEXT: [[TMP125:%.*]] = mul nuw i64 [[TMP2]], 4
-// CHECK1-NEXT: [[TMP126:%.*]] = mul nuw i64 5, [[TMP5]]
-// CHECK1-NEXT: [[TMP127:%.*]] = mul nuw i64 [[TMP126]], 8
-// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes.7, i64 72, i1 false)
-// CHECK1-NEXT: [[TMP128:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
-// CHECK1-NEXT: store i64 [[TMP123]], ptr [[TMP128]], align 8
-// CHECK1-NEXT: [[TMP129:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK1-NEXT: store i64 [[TMP123]], ptr [[TMP129]], align 8
-// CHECK1-NEXT: [[TMP130:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 0
-// CHECK1-NEXT: store ptr null, ptr [[TMP130]], align 8
-// CHECK1-NEXT: [[TMP131:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[B]], ptr [[TMP131]], align 8
-// CHECK1-NEXT: [[TMP132:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[B]], ptr [[TMP132]], align 8
-// CHECK1-NEXT: [[TMP133:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 1
-// CHECK1-NEXT: store ptr null, ptr [[TMP133]], align 8
-// CHECK1-NEXT: [[TMP134:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 2
-// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP134]], align 8
-// CHECK1-NEXT: [[TMP135:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 2
-// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP135]], align 8
-// CHECK1-NEXT: [[TMP136:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 2
-// CHECK1-NEXT: store ptr null, ptr [[TMP136]], align 8
-// CHECK1-NEXT: [[TMP137:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP137]], align 8
-// CHECK1-NEXT: [[TMP138:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP138]], align 8
-// CHECK1-NEXT: [[TMP139:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 3
-// CHECK1-NEXT: store i64 [[TMP125]], ptr [[TMP139]], align 8
-// CHECK1-NEXT: [[TMP140:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 3
+// CHECK1-NEXT: [[TMP124:%.*]] = load i16, ptr [[AA]], align 2
+// CHECK1-NEXT: store i16 [[TMP124]], ptr [[AA_CASTED25]], align 2
+// CHECK1-NEXT: [[TMP125:%.*]] = load i64, ptr [[AA_CASTED25]], align 8
+// CHECK1-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0
+// CHECK1-NEXT: store i64 [[TMP123]], ptr [[TMP126]], align 8
+// CHECK1-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 0
+// CHECK1-NEXT: store i64 [[TMP123]], ptr [[TMP127]], align 8
+// CHECK1-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP128]], align 8
+// CHECK1-NEXT: [[TMP129:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 1
+// CHECK1-NEXT: store i64 [[TMP125]], ptr [[TMP129]], align 8
+// CHECK1-NEXT: [[TMP130:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 1
+// CHECK1-NEXT: store i64 [[TMP125]], ptr [[TMP130]], align 8
+// CHECK1-NEXT: [[TMP131:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 1
+// CHECK1-NEXT: store ptr null, ptr [[TMP131]], align 8
+// CHECK1-NEXT: [[TMP132:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP133:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 0
+// CHECK1-NEXT: store i32 3, ptr [[TMP134]], align 4
+// CHECK1-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 1
+// CHECK1-NEXT: store i32 2, ptr [[TMP135]], align 4
+// CHECK1-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP132]], ptr [[TMP136]], align 8
+// CHECK1-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP133]], ptr [[TMP137]], align 8
+// CHECK1-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP138]], align 8
+// CHECK1-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP139]], align 8
+// CHECK1-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 6
// CHECK1-NEXT: store ptr null, ptr [[TMP140]], align 8
-// CHECK1-NEXT: [[TMP141:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 4
-// CHECK1-NEXT: store ptr [[C]], ptr [[TMP141]], align 8
-// CHECK1-NEXT: [[TMP142:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 4
-// CHECK1-NEXT: store ptr [[C]], ptr [[TMP142]], align 8
-// CHECK1-NEXT: [[TMP143:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 4
-// CHECK1-NEXT: store ptr null, ptr [[TMP143]], align 8
-// CHECK1-NEXT: [[TMP144:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 5
-// CHECK1-NEXT: store i64 5, ptr [[TMP144]], align 8
-// CHECK1-NEXT: [[TMP145:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 5
-// CHECK1-NEXT: store i64 5, ptr [[TMP145]], align 8
-// CHECK1-NEXT: [[TMP146:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 5
-// CHECK1-NEXT: store ptr null, ptr [[TMP146]], align 8
-// CHECK1-NEXT: [[TMP147:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 6
-// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP147]], align 8
-// CHECK1-NEXT: [[TMP148:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 6
-// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP148]], align 8
-// CHECK1-NEXT: [[TMP149:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP149]], align 8
-// CHECK1-NEXT: [[TMP150:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 7
-// CHECK1-NEXT: store ptr [[VLA1]], ptr [[TMP150]], align 8
-// CHECK1-NEXT: [[TMP151:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 7
-// CHECK1-NEXT: store ptr [[VLA1]], ptr [[TMP151]], align 8
-// CHECK1-NEXT: [[TMP152:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 7
-// CHECK1-NEXT: store i64 [[TMP127]], ptr [[TMP152]], align 8
-// CHECK1-NEXT: [[TMP153:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 7
-// CHECK1-NEXT: store ptr null, ptr [[TMP153]], align 8
-// CHECK1-NEXT: [[TMP154:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 8
-// CHECK1-NEXT: store ptr [[D]], ptr [[TMP154]], align 8
-// CHECK1-NEXT: [[TMP155:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 8
-// CHECK1-NEXT: store ptr [[D]], ptr [[TMP155]], align 8
-// CHECK1-NEXT: [[TMP156:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 8
-// CHECK1-NEXT: store ptr null, ptr [[TMP156]], align 8
-// CHECK1-NEXT: [[TMP157:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP158:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP159:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP160:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0
-// CHECK1-NEXT: store i32 3, ptr [[TMP160]], align 4
-// CHECK1-NEXT: [[TMP161:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1
-// CHECK1-NEXT: store i32 9, ptr [[TMP161]], align 4
-// CHECK1-NEXT: [[TMP162:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2
-// CHECK1-NEXT: store ptr [[TMP157]], ptr [[TMP162]], align 8
-// CHECK1-NEXT: [[TMP163:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 3
-// CHECK1-NEXT: store ptr [[TMP158]], ptr [[TMP163]], align 8
-// CHECK1-NEXT: [[TMP164:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 4
-// CHECK1-NEXT: store ptr [[TMP159]], ptr [[TMP164]], align 8
-// CHECK1-NEXT: [[TMP165:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 5
-// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP165]], align 8
-// CHECK1-NEXT: [[TMP166:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 6
-// CHECK1-NEXT: store ptr null, ptr [[TMP166]], align 8
-// CHECK1-NEXT: [[TMP167:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 7
+// CHECK1-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP141]], align 8
+// CHECK1-NEXT: [[TMP142:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 8
+// CHECK1-NEXT: store i64 0, ptr [[TMP142]], align 8
+// CHECK1-NEXT: [[TMP143:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 9
+// CHECK1-NEXT: store i64 0, ptr [[TMP143]], align 8
+// CHECK1-NEXT: [[TMP144:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 10
+// CHECK1-NEXT: store [3 x i32] [i32 1, i32 2, i32 0], ptr [[TMP144]], align 4
+// CHECK1-NEXT: [[TMP145:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 11
+// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP145]], align 4
+// CHECK1-NEXT: [[TMP146:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 12
+// CHECK1-NEXT: store i32 0, ptr [[TMP146]], align 4
+// CHECK1-NEXT: [[TMP147:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130.region_id, ptr [[KERNEL_ARGS29]])
+// CHECK1-NEXT: [[TMP148:%.*]] = icmp ne i32 [[TMP147]], 0
+// CHECK1-NEXT: br i1 [[TMP148]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]]
+// CHECK1: omp_offload.failed30:
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130(i64 [[TMP123]], i64 [[TMP125]]) #[[ATTR3]]
+// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT31]]
+// CHECK1: omp_offload.cont31:
+// CHECK1-NEXT: [[TMP149:%.*]] = load i32, ptr [[A]], align 4
+// CHECK1-NEXT: store i32 [[TMP149]], ptr [[A_CASTED32]], align 4
+// CHECK1-NEXT: [[TMP150:%.*]] = load i64, ptr [[A_CASTED32]], align 8
+// CHECK1-NEXT: [[TMP151:%.*]] = load i16, ptr [[AA]], align 2
+// CHECK1-NEXT: store i16 [[TMP151]], ptr [[AA_CASTED33]], align 2
+// CHECK1-NEXT: [[TMP152:%.*]] = load i64, ptr [[AA_CASTED33]], align 8
+// CHECK1-NEXT: [[TMP153:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 0
+// CHECK1-NEXT: store i64 [[TMP150]], ptr [[TMP153]], align 8
+// CHECK1-NEXT: [[TMP154:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS35]], i32 0, i32 0
+// CHECK1-NEXT: store i64 [[TMP150]], ptr [[TMP154]], align 8
+// CHECK1-NEXT: [[TMP155:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS36]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP155]], align 8
+// CHECK1-NEXT: [[TMP156:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 1
+// CHECK1-NEXT: store i64 [[TMP152]], ptr [[TMP156]], align 8
+// CHECK1-NEXT: [[TMP157:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS35]], i32 0, i32 1
+// CHECK1-NEXT: store i64 [[TMP152]], ptr [[TMP157]], align 8
+// CHECK1-NEXT: [[TMP158:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS36]], i64 0, i64 1
+// CHECK1-NEXT: store ptr null, ptr [[TMP158]], align 8
+// CHECK1-NEXT: [[TMP159:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP160:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS35]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP161:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 0
+// CHECK1-NEXT: store i32 3, ptr [[TMP161]], align 4
+// CHECK1-NEXT: [[TMP162:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KE...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/101407
More information about the llvm-branch-commits
mailing list